sqry_core/graph/unified/rebuild/rebuild_graph.rs
1//! [A2 §H] `RebuildGraph` + `clone_for_rebuild` + `finalize()` — Gate 0c.
2//!
3//! **This module is feature-gated.** Every item lives behind
4//! `#[cfg(feature = "rebuild-internals")]`; only `sqry-daemon` is
5//! whitelisted to enable the feature (see
6//! `sqry-core/tests/rebuild_internals_whitelist.rs` and the plan at
7//! `docs/superpowers/plans/2026-03-19-sqryd-daemon.md` §H "Placement
8//! and feature gate").
9//!
10//! # What this module delivers
11//!
12//! Per the plan's A2 §H (pre-implementation Gate 0c, required before
13//! any `clone_for_rebuild` caller in Task 4 Step 4 can exist):
14//!
15//! 1. [`sqry_graph_fields!`] — a **single source of truth** macro that
16//! enumerates every field on [`super::super::concurrent::CodeGraph`].
17//! It is invoked in three places by the rest of this module:
18//! - `sqry_graph_fields!(@decl_rebuild)` declares the
19//! [`RebuildGraph`] struct.
20//! - `sqry_graph_fields!(@clone_inner from self)` is expanded
21//! inside [`CodeGraph::clone_for_rebuild`]; the destructure
22//! `let CodeGraph { .. } = self;` is exhaustive, so adding a
23//! field to [`CodeGraph`] without adding it to the macro's
24//! field list is a hard `E0027` compile error.
25//! - `sqry_graph_fields!(@field_names)` emits a `&[&str]` of
26//! every field name, used by the Gate 0d bijection check
27//! (future) and by diagnostics.
28//! 2. [`CodeGraph::clone_for_rebuild`] — deep-copies every Arc-wrapped
29//! field into an owned, rebuild-local [`RebuildGraph`]. The rebuild
30//! dispatcher (Task 4 Step 4) is the only caller.
31//! 3. [`RebuildGraph::finalize`] — the canonical 14-step sequence
32//! spelled out in §H lines 658–707. Every step is either an API
33//! call on an owned component (freeze interner, compact arena,
34//! compact edges, compact indices, compact metadata, compact file
35//! buckets, compact K-list extras), a drain/move of scratch state
36//! (drain tombstones), a derived-state reset (rebuild CSR), a
37//! per-language metadata update (confidence), a scalar bump (epoch),
38//! a struct assembly (`Arc::new` wrapping), or a debug invariant
39//! (bucket bijection + tombstone residue).
40//!
41//! # Why the macro is kind-tagged, not a single callback
42//!
43//! The plan's literal example invokes `sqry_graph_fields!(CodeGraph,
44//! RebuildGraph)` to generate both structs from one sibling macro
45//! call. That shape would require `CodeGraph` to be declared by the
46//! macro — but `CodeGraph` has dozens of `impl` blocks, Clone
47//! semantics, and accessor methods that already destructure the fields
48//! by name on stable Rust. Moving the declaration into a macro
49//! expansion would obscure every one of those sites.
50//!
51//! A callback-based idiom (`sqry_graph_fields!(my_cb!)`) was also
52//! considered; it worked for the struct declaration but collapsed
53//! under `macro_rules!` hygiene constraints for the destructure-and-
54//! construct clone path (the `self` keyword cannot be synthesised by
55//! a macro; passing identifiers across a callback boundary requires
56//! forwarding them through an `:expr` metavar, which loses the
57//! token-level decomposition needed for Arc-vs-scalar dispatch).
58//!
59//! The **kind-tagged single macro** settled on here expands inline at
60//! the call site — `sqry_graph_fields!(@clone_inner from self)` —
61//! so the destructure runs in the caller's hygiene context, the
62//! `self` keyword is the caller's `self`, and the per-field Arc /
63//! scalar / owned dispatch is baked into the macro body without a
64//! secondary callback hop.
65//!
66//! The single-source-of-truth guarantee is the same: adding a field
67//! requires touching (a) the `CodeGraph` struct in
68//! `concurrent/graph.rs`, (b) the field list inside this macro, and
69//! (c) the step in `finalize()` that compacts it. Missing any of
70//! (a) or (b) makes `clone_for_rebuild_inner` fail to compile.
71//!
72//! # Release-build posture
73//!
74//! The debug bucket-bijection and tombstone-residue checks at steps 13
75//! and 14 are gated on `cfg(any(debug_assertions, test))`. Release
76//! builds pay zero overhead. The §E equivalence harness in CI is the
77//! release-time drift backstop (plan §F "Release build semantics").
78
79// Module-level `#[cfg(feature = "rebuild-internals")]` lives on the
80// `pub mod rebuild_graph;` declaration in the parent `mod.rs`; duplicating
81// it here trips the `duplicated_attributes` clippy lint.
82#![allow(clippy::too_many_lines)]
83
84use std::collections::{HashMap, HashSet};
85
86use crate::confidence::ConfidenceMetadata;
87use crate::graph::error::GraphResult;
88use crate::graph::unified::bind::alias::AliasTable;
89use crate::graph::unified::bind::scope::ScopeArena;
90use crate::graph::unified::bind::scope::provenance::ScopeProvenanceStore;
91use crate::graph::unified::bind::shadow::ShadowTable;
92use crate::graph::unified::concurrent::CodeGraph;
93use crate::graph::unified::edge::bidirectional::BidirectionalEdgeStore;
94use crate::graph::unified::node::NodeId;
95use crate::graph::unified::storage::arena::NodeArena;
96use crate::graph::unified::storage::c_indirect::CIndirectSideTables;
97use crate::graph::unified::storage::edge_provenance::EdgeProvenanceStore;
98use crate::graph::unified::storage::indices::AuxiliaryIndices;
99use crate::graph::unified::storage::interner::StringInterner;
100use crate::graph::unified::storage::metadata::NodeMetadataStore;
101use crate::graph::unified::storage::node_provenance::NodeProvenanceStore;
102use crate::graph::unified::storage::registry::FileRegistry;
103use crate::graph::unified::storage::segment::FileSegmentTable;
104
105use super::coverage::NodeIdBearing;
106
107// =====================================================================
108// Single source of truth: field list
109// =====================================================================
110
111/// Single source of truth for the [`CodeGraph`] field list.
112///
113/// This macro takes a **kind-tag** selector (`@decl_rebuild`,
114/// `@clone_inner from ...`, `@field_names`) and emits a tailored
115/// expansion for each consumer. The field-list rows live exactly once,
116/// inside this macro body, so any new field is added in one place:
117/// here.
118///
119/// # Adding / renaming / removing a field on [`CodeGraph`]
120///
121/// 1. Edit the `CodeGraph` struct in
122/// `sqry-core/src/graph/unified/concurrent/graph.rs` (declaration
123/// order matters — match the order here).
124/// 2. Edit the field list below. The `@decl_rebuild` arm auto-mirrors
125/// into `RebuildGraph`; the `@clone_inner` arm auto-mirrors into
126/// `clone_for_rebuild_inner`.
127/// 3. Extend `RebuildGraph::finalize()` with the step that compacts
128/// the new field (and, if it carries `NodeId`s, add a K.A/K.B row
129/// + `NodeIdBearing` impl in `super::coverage`).
130///
131/// Missing step (1) or (2) is a hard `E0027` compile error in the
132/// `let CodeGraph { .. } = ...` destructure inside the `@clone_inner`
133/// arm. Step (3) is enforced by the §F tombstone-residue check
134/// (Gate 0d) and by the §E incremental-vs-full equivalence harness.
135#[macro_export]
136macro_rules! sqry_graph_fields {
137 // -------------------------------------------------------------
138 // Arm @decl_rebuild: emit the `RebuildGraph` struct declaration.
139 // -------------------------------------------------------------
140 (@decl_rebuild) => {
141 /// Owned, rebuild-local mirror of [`CodeGraph`].
142 ///
143 /// Each field stores a value type rather than an [`Arc`], so
144 /// the rebuild task can mutate freely without touching the
145 /// live published graph. The only path to produce a
146 /// publishable [`CodeGraph`] from a [`RebuildGraph`] is
147 /// [`RebuildGraph::finalize`]; the trybuild fixture at
148 /// `sqry-core/tests/rebuild_internals_compile_fail/rebuild_graph_no_public_assembly.rs`
149 /// locks that invariant in.
150 pub struct RebuildGraph {
151 pub(crate) nodes: NodeArena,
152 pub(crate) edges: BidirectionalEdgeStore,
153 pub(crate) strings: StringInterner,
154 pub(crate) files: FileRegistry,
155 pub(crate) indices: AuxiliaryIndices,
156 pub(crate) macro_metadata: NodeMetadataStore,
157 pub(crate) node_provenance: NodeProvenanceStore,
158 pub(crate) edge_provenance: EdgeProvenanceStore,
159 pub(crate) fact_epoch: u64,
160 pub(crate) epoch: u64,
161 pub(crate) confidence: HashMap<String, ConfidenceMetadata>,
162 pub(crate) scope_arena: ScopeArena,
163 pub(crate) alias_table: AliasTable,
164 pub(crate) shadow_table: ShadowTable,
165 pub(crate) scope_provenance_store: ScopeProvenanceStore,
166 pub(crate) file_segments: FileSegmentTable,
167 /// Phase A (U09): C indirect-call resolver side tables.
168 /// `None` outside C workspaces; cloned through the rebuild
169 /// pipeline so an in-flight rebuild preserves the side
170 /// tables already committed on the source graph.
171 pub(crate) c_indirect_tables: Option<CIndirectSideTables>,
172 /// Build-time scratch side-channel for the Go plugin's
173 /// post-Phase-4e `pass_go_method_set_satisfaction` (Cluster
174 /// A of the Go T1 implements-and-promotion design). Mirror
175 /// of `CodeGraph::go_hints`; held by value (no Arc) for
176 /// the same rationale as on `CodeGraph`.
177 pub(crate) go_hints: $crate::graph::unified::build::staging::GoHints,
178 /// Active tombstone set during finalize steps 2–7.
179 /// Populated by `RebuildGraph::remove_file` /
180 /// `RebuildGraph::tombstone` (added by Task 4 Step 4) and
181 /// drained into [`drained_tombstones`] at step 8.
182 pub(crate) tombstones: HashSet<NodeId>,
183 /// Snapshot of [`tombstones`] taken at finalize step 8,
184 /// kept for the debug residue check in step 14.
185 pub(crate) drained_tombstones: HashSet<NodeId>,
186 }
187 };
188
189 // -------------------------------------------------------------
190 // Arm @clone_inner: emit the body of `clone_for_rebuild_inner`.
191 //
192 // The caller passes their `self` binding as `$this:expr` so
193 // macro hygiene preserves the method-local reference. The
194 // destructure is exhaustive — adding a `CodeGraph` field without
195 // also adding it here is a hard `E0027` compile error. Removing
196 // a row here without removing the `CodeGraph` field is an unused-
197 // pattern warning that our `-D warnings` CI setting escalates.
198 // -------------------------------------------------------------
199 (@clone_inner from $this:expr) => {{
200 let CodeGraph {
201 nodes,
202 edges,
203 strings,
204 files,
205 indices,
206 macro_metadata,
207 node_provenance,
208 edge_provenance,
209 fact_epoch,
210 epoch,
211 confidence,
212 scope_arena,
213 alias_table,
214 shadow_table,
215 scope_provenance_store,
216 file_segments,
217 c_indirect_tables,
218 go_hints,
219 // Not mirrored into RebuildGraph: a rebuild reparses source files and
220 // regenerates is_definition fresh, so `finalize` stamps the marker
221 // `true` unconditionally. Destructured to `_` to keep the match
222 // exhaustive (the E0027 guard still fires for any other new field).
223 definition_signal_present: _,
224 // Not mirrored into RebuildGraph: a rebuild reparses source files and
225 // reclassifies imports fresh, so the reassembled graph carries genuine
226 // signal. Destructured to `_` to keep the match exhaustive.
227 import_classification_signal_present: _,
228 } = $this;
229 RebuildGraph {
230 nodes: (**nodes).clone(),
231 edges: (**edges).clone(),
232 strings: (**strings).clone(),
233 files: (**files).clone(),
234 indices: (**indices).clone(),
235 macro_metadata: (**macro_metadata).clone(),
236 node_provenance: (**node_provenance).clone(),
237 edge_provenance: (**edge_provenance).clone(),
238 fact_epoch: *fact_epoch,
239 epoch: *epoch,
240 confidence: confidence.clone(),
241 scope_arena: (**scope_arena).clone(),
242 alias_table: (**alias_table).clone(),
243 shadow_table: (**shadow_table).clone(),
244 scope_provenance_store: (**scope_provenance_store).clone(),
245 file_segments: (**file_segments).clone(),
246 c_indirect_tables: c_indirect_tables.clone(),
247 go_hints: go_hints.clone(),
248 tombstones: ::std::collections::HashSet::new(),
249 drained_tombstones: ::std::collections::HashSet::new(),
250 }
251 }};
252
253 // -------------------------------------------------------------
254 // Arm @field_names: emit a comma-separated list of every field
255 // name. Used by tests that want to assert coverage of field names
256 // against a declared list without re-parsing Rust source.
257 // -------------------------------------------------------------
258 (@field_names) => {
259 &[
260 "nodes",
261 "edges",
262 "strings",
263 "files",
264 "indices",
265 "macro_metadata",
266 "node_provenance",
267 "edge_provenance",
268 "fact_epoch",
269 "epoch",
270 "confidence",
271 "scope_arena",
272 "alias_table",
273 "shadow_table",
274 "scope_provenance_store",
275 "file_segments",
276 "c_indirect_tables",
277 "go_hints",
278 ]
279 };
280}
281
282// =====================================================================
283// RebuildGraph struct declaration (macro-driven)
284// =====================================================================
285
286sqry_graph_fields!(@decl_rebuild);
287
288// =====================================================================
289// clone_for_rebuild: CodeGraph -> RebuildGraph
290// =====================================================================
291
292impl CodeGraph {
293 /// Produce a fresh [`RebuildGraph`] from this graph's current
294 /// committed state, deep-cloning every Arc-wrapped component so the
295 /// returned value is decoupled from future mutations to `self`.
296 ///
297 /// The exhaustive destructure inside the `@clone_inner` arm of
298 /// [`sqry_graph_fields!`] guarantees that every field on
299 /// [`CodeGraph`] is mirrored into the returned [`RebuildGraph`].
300 /// Adding a field to [`CodeGraph`] without also adding it to the
301 /// field list inside `sqry_graph_fields!` is a hard `E0027`
302 /// compile error on the `let CodeGraph { .. } = self;` destructure.
303 ///
304 /// # When to call this
305 ///
306 /// Only the incremental-rebuild dispatcher (Task 4 Step 4) calls
307 /// `clone_for_rebuild`, on the rebuild task's background tokio
308 /// context, against an `Arc<CodeGraph>` freshly obtained via
309 /// `ArcSwap::load_full()`. The Arc's refcount is 1 in the common
310 /// case, so the underlying deep clones amount to `Arc::get_mut`-
311 /// equivalent cost on each component.
312 ///
313 /// # Performance budget (A2 §H, line 734)
314 ///
315 /// On a 384k-node / 1.3M-edge reference graph, this call must
316 /// complete in < 50 ms. The daemon's rebuild-latency benchmark
317 /// tracks the budget; warning threshold 50 ms, hard record
318 /// threshold 200 ms. Exceeding the warning logs but does not fail
319 /// the rebuild.
320 #[must_use]
321 pub fn clone_for_rebuild(&self) -> RebuildGraph {
322 Self::clone_for_rebuild_inner(self)
323 }
324
325 /// Inner implementation, kept separate so the public entrypoint is
326 /// a single-line shim that also makes the exhaustive destructure
327 /// visible at the top of the rebuild surface.
328 ///
329 /// The `@clone_inner from self` macro arm emits the destructure +
330 /// struct-construction inline; passing `self` as an `:expr`
331 /// metavar preserves call-site hygiene so the macro-introduced
332 /// field bindings (`nodes`, `edges`, ...) are resolvable against
333 /// the `self` binding in this method.
334 fn clone_for_rebuild_inner(&self) -> RebuildGraph {
335 sqry_graph_fields!(@clone_inner from self)
336 }
337}
338
339// =====================================================================
340// finalize(): RebuildGraph -> CodeGraph — the 14-step contract
341// =====================================================================
342
343impl RebuildGraph {
344 /// Consume this [`RebuildGraph`] and assemble a publishable
345 /// [`CodeGraph`].
346 ///
347 /// This is the **only** safe path from [`RebuildGraph`] back to
348 /// [`CodeGraph`]. No public API converts a [`RebuildGraph`] into
349 /// an [`Arc<CodeGraph>`] any other way, and no
350 /// `From<RebuildGraph> for CodeGraph` impl exists — the trybuild
351 /// fixture at
352 /// `sqry-core/tests/rebuild_internals_compile_fail/rebuild_graph_no_public_assembly.rs`
353 /// locks that property in.
354 ///
355 /// # The 14 ordered steps (plan §H lines 658–707)
356 ///
357 /// Each numbered comment below traces the matching plan step. In
358 /// debug / test builds, steps 13 and 14 also execute the §F
359 /// bijection and tombstone-residue checks; release builds compile
360 /// them out.
361 ///
362 /// Steps 2–7 uniformly invoke
363 /// [`super::coverage::NodeIdBearing::retain_nodes`] with the
364 /// closure `|nid| !self.tombstones.contains(&nid)`, so adding a new
365 /// K.A or K.B row for a future `NodeId`-bearing container
366 /// automatically extends compaction coverage once the new row's
367 /// `retain_nodes` call is appended to step 7.
368 ///
369 /// # Errors
370 ///
371 /// Returns `Err(GraphBuilderError::Internal{..})` only if a
372 /// compaction primitive fails; all current primitives are
373 /// infallible, so `finalize()` is currently infallible. The `Result`
374 /// return is preserved for future fallible compaction steps without
375 /// a signature change.
376 pub fn finalize(mut self) -> GraphResult<CodeGraph> {
377 // ----------------------------------------------------------------
378 // Step 1 — Freeze the rebuild's interner.
379 //
380 // The plan §H describes step 1 as `new_strings =
381 // self.string_builder.freeze()`. In this codebase the
382 // rebuild-local interner is a [`StringInterner`] (no separate
383 // "builder" type), so the concrete freeze operation is:
384 //
385 // (a) canonicalise the interner by running
386 // [`StringInterner::build_dedup_table`]. This guarantees:
387 // * `lookup_stale == false` (required for all read-path
388 // accessors); any in-flight bulk-alloc residue from a
389 // prior failed rebuild is healed.
390 // * Every string value is backed by exactly one
391 // canonical slot — no duplicate arcs remain. The
392 // `ref_counts` of duplicate slots are folded into
393 // their canonical slot. `StringId` values held by
394 // live `NodeEntry`s remain valid because this pass
395 // only collapses slots that are *not* referenced from
396 // live nodes (nothing in this pass renames already
397 // live slots).
398 // (b) rewrite every live `NodeEntry` through the remap table
399 // produced by (a) so any node whose fields happen to
400 // point at a now-collapsed duplicate slot is rewired to
401 // the canonical slot. This preserves the post-freeze
402 // invariant "every live `NodeEntry` references only
403 // canonical `StringId`s" even if earlier rebuild steps
404 // produced duplicate slots.
405 // (c) prune unreferenced slots with
406 // [`StringInterner::recycle_unreferenced`] so the frozen
407 // interner carries only strings the final graph actually
408 // needs. This is the step that drives
409 // `interner_live_ratio < interner_compaction_threshold`
410 // down over time (plan §H line 713) so the next
411 // housekeeping rebuild observes an accurate live ratio.
412 //
413 // After this block runs, `self.strings` is in a fully
414 // canonical, shared-immutable-ready state. Step 12's
415 // `Arc::new(new_strings)` is then purely the ownership-to-
416 // shared transition; the semantic freeze itself has already
417 // happened here.
418 // ----------------------------------------------------------------
419 let string_remap = self.strings.build_dedup_table();
420 if !string_remap.is_empty() {
421 // Rewrite every live node's interned-string fields through
422 // the remap so freshly-collapsed duplicate slots are not
423 // reachable from any live `NodeEntry`. This is required
424 // for (c) below to be safe to call: `recycle_unreferenced`
425 // only frees slots with `ref_count == 0`, so the refcount
426 // bookkeeping that `build_dedup_table` consolidated into
427 // the canonical slot must be honoured by node-level fields
428 // as well.
429 rewrite_node_entries_through_remap(&mut self.nodes, &string_remap);
430 // Iter-2 B1 (verbatim): `AuxiliaryIndices::name_index` /
431 // `qualified_name_index` are keyed by `StringId`. If
432 // `build_dedup_table()` collapses a key's slot and
433 // `recycle_unreferenced` subsequently frees it, the bucket
434 // keys would dangle. Remap every `StringId`-backed holder
435 // on the rebuild through the same dedup table before the
436 // recycle pass runs, merging any buckets that collapse onto
437 // the same canonical key.
438 //
439 // All remap-capable stores are listed here exhaustively.
440 // Extending this block when a new `CodeGraph` field gains a
441 // `StringId` payload is a code-owner obligation tied to the
442 // `sqry_graph_fields!` macro (plan §H "Placement and feature
443 // gate"). Today, the StringId-bearing surfaces on the
444 // publish-visible graph are:
445 //
446 // * `AuxiliaryIndices.name_index` / `qualified_name_index`
447 // (BTreeMap keys — collapse on canonicalisation)
448 // * `FileRegistry` — `FileEntry.source_uri: Option<StringId>`
449 // per slot
450 // * `AliasTable` — `AliasEntry.from_symbol` /
451 // `to_symbol` per entry; the `(scope, from_symbol)`
452 // sort key is re-established after rewrite
453 // * `ShadowTable` — `ShadowEntry.symbol` per entry; the
454 // `(scope, symbol, byte_offset)` sort key and the
455 // `chains` range index are rebuilt after rewrite
456 //
457 // `NodeArena` (above) is the fifth surface; it is handled
458 // inline because it is the single largest consumer and
459 // already has a purpose-built helper.
460 //
461 // Iter-3 B1 (verbatim): `BidirectionalEdgeStore` holds
462 // `EdgeKind` instances whose variants — `Imports{alias}`,
463 // `Exports{alias}`, `TypeOf{name}`, `TraitMethodBinding
464 // {trait_name, impl_type}`, `HttpRequest{url}`,
465 // `GrpcCall{service, method}`, `DbQuery{table}`,
466 // `TableRead{table_name, schema}`, `TableWrite{table_name,
467 // schema}`, `TriggeredBy{trigger_name, schema}`,
468 // `MessageQueue{protocol::Other(_), topic}`,
469 // `WebSocket{event}`, `GraphQLOperation{operation}`,
470 // `ProcessExec{command}`, `FileIpc{path_pattern}`,
471 // `ProtocolCall{protocol, metadata}` — carry
472 // `StringId` payloads inside both the steady-state CSR
473 // (`CsrGraph::edge_kind`) and the mutable delta
474 // (`DeltaBuffer` `DeltaEdge::kind`), in **both** the
475 // forward and reverse stores. Before this iter-4 fix,
476 // step 1 left those payloads un-remapped — so after
477 // `recycle_unreferenced` below, those edges would have
478 // dangled onto freed interner slots. The full-build
479 // pipeline recognises this surface explicitly at
480 // `build/entrypoint.rs` (Phase 4b) +
481 // `build/parallel_commit.rs::phase4_apply_global_remap`;
482 // the rebuild pipeline must do the same against
483 // committed storage because, unlike Phase 4b, the edges
484 // are no longer a `Vec<Vec<PendingEdge>>`.
485 //
486 // `BidirectionalEdgeStore::rewrite_edge_kind_string_ids_through_remap`
487 // is the sixth surface on this list. The exhaustive match
488 // on `EdgeKind` lives in
489 // `parallel_commit::remap_edge_kind_string_ids` so adding a
490 // new `StringId`-bearing variant becomes a single source of
491 // truth: a compile error there forces both pipelines to
492 // rewrite it.
493 //
494 // Stores whose data is keyed by `NodeId`/`FileId`/`EdgeId`
495 // only (metadata keyed on NodeId, node / edge provenance,
496 // scope arena indexed by ScopeId, scope provenance,
497 // file-segments keyed on FileId/EdgeId) do not hold
498 // `StringId` payloads and are out of scope for this
499 // rewrite. Any future field that lifts a `StringId` onto
500 // `CodeGraph` must extend this list in the same commit
501 // that declares it, matching the A2 §K discipline.
502 self.indices.rewrite_string_ids_through_remap(&string_remap);
503 self.files.rewrite_string_ids_through_remap(&string_remap);
504 self.alias_table
505 .rewrite_string_ids_through_remap(&string_remap);
506 self.shadow_table
507 .rewrite_string_ids_through_remap(&string_remap);
508 // Iter-4 K.B1 fix: rewrite `StringId` payloads carried by
509 // committed `EdgeKind`s in both forward and reverse stores,
510 // across both CSR and delta tiers. Must run before
511 // `recycle_unreferenced` below so freed slots stay unreferenced.
512 self.edges
513 .rewrite_edge_kind_string_ids_through_remap(&string_remap);
514 }
515 self.strings.recycle_unreferenced();
516
517 // ----------------------------------------------------------------
518 // Step 2 — Compact NodeArena.
519 //
520 // K.A1 row: `NodeArena::retain_nodes` drops every occupied slot
521 // whose NodeId fails the predicate, advancing the slot's
522 // generation so lingering NodeId handles become stale.
523 // ----------------------------------------------------------------
524 {
525 let tombstones = &self.tombstones;
526 let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
527 Box::new(move |nid| !tombstones.contains(&nid));
528 self.nodes.retain_nodes(&*predicate);
529 }
530
531 // ----------------------------------------------------------------
532 // Step 3 — Compact BidirectionalEdgeStore (forward + reverse).
533 //
534 // K.A2 + K.A3 rows: `retain_nodes` drops every delta edge with
535 // a tombstoned endpoint in either direction. CSR compaction is
536 // deferred to step 9 (CSR is derived state, rebuilt not mutated).
537 // ----------------------------------------------------------------
538 {
539 let tombstones = &self.tombstones;
540 let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
541 Box::new(move |nid| !tombstones.contains(&nid));
542 self.edges.retain_nodes(&*predicate);
543 }
544
545 // ----------------------------------------------------------------
546 // Step 4 — Compact AuxiliaryIndices.
547 //
548 // K.A4–K.A7 rows (kind / name / qualified-name / file indices):
549 // a single `AuxiliaryIndices::retain_nodes` call visits every
550 // inner bucket.
551 // ----------------------------------------------------------------
552 {
553 let tombstones = &self.tombstones;
554 let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
555 Box::new(move |nid| !tombstones.contains(&nid));
556 self.indices.retain_nodes(&*predicate);
557 }
558
559 // ----------------------------------------------------------------
560 // Step 5 — Compact NodeMetadataStore.
561 //
562 // K.A8 row: macro / classpath per-node metadata keyed by the
563 // full `(index, generation)` pair.
564 // ----------------------------------------------------------------
565 {
566 let tombstones = &self.tombstones;
567 let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
568 Box::new(move |nid| !tombstones.contains(&nid));
569 self.macro_metadata.retain_nodes(&*predicate);
570 }
571
572 // ----------------------------------------------------------------
573 // Step 6 — Compact FileRegistry per-file buckets.
574 //
575 // K.B1 row. Iter-2 B2 (verbatim): this step used to be a
576 // no-op scheduled against a future base-plan Step 1. Pulling
577 // `per_file_nodes: HashMap<FileId, Vec<NodeId>>` forward into
578 // Gate 0c retires the no-op — every `NodeId` the rebuild's
579 // parallel-parse pass committed is already bucketed via
580 // `FileRegistry::record_node`, so this step now does real work:
581 //
582 // * drop every `NodeId` whose arena slot was tombstoned in
583 // step 2 (or earlier) via the shared predicate
584 // * dedup each surviving bucket
585 // * drop buckets that collapse to empty
586 //
587 // The §F.1 bucket-bijection check at step 13 consumes the
588 // result of this compaction, so a mis-applied predicate here
589 // surfaces as a panic at the publish boundary in debug builds.
590 // ----------------------------------------------------------------
591 {
592 let tombstones = &self.tombstones;
593 let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
594 Box::new(move |nid| !tombstones.contains(&nid));
595 self.files.retain_nodes(&*predicate);
596 }
597
598 // ----------------------------------------------------------------
599 // Step 7 — Compact K-list extras.
600 //
601 // Rows K.A10 (node_provenance), K.A11 (scope_arena),
602 // K.A12 (alias_table), K.A13 (shadow_table). Each is a direct
603 // NodeIdBearing::retain_nodes call.
604 //
605 // When a future task lifts a new NodeId-bearing structure onto
606 // `CodeGraph`, extend this step with one additional
607 // `retain_nodes` call and add a K.A/K.B row to
608 // `super::coverage` per the plan §K contract.
609 // ----------------------------------------------------------------
610 {
611 let tombstones = &self.tombstones;
612 let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
613 Box::new(move |nid| !tombstones.contains(&nid));
614 self.node_provenance.retain_nodes(&*predicate);
615 self.scope_arena.retain_nodes(&*predicate);
616 self.alias_table.retain_nodes(&*predicate);
617 self.shadow_table.retain_nodes(&*predicate);
618 }
619
620 // ----------------------------------------------------------------
621 // Step 8 — Drain tombstones into `drained_tombstones`.
622 //
623 // The residue check at step 14 asserts that the finalized
624 // `CodeGraph` does not contain any of these NodeIds in any
625 // NodeId-bearing structure. Keep the drained set until the
626 // check completes; then it is dropped with `self`.
627 // ----------------------------------------------------------------
628 self.drained_tombstones = std::mem::take(&mut self.tombstones);
629
630 // ----------------------------------------------------------------
631 // Step 9 — Rebuild CSR adjacency (both directions).
632 //
633 // CSR is derived state (K.A9 — "CSR adjacency is derived state;
634 // rebuilt from compacted edges — never mutated in place"). After
635 // step 3 filtered the delta tier to live-only endpoints, the
636 // correct operation is to *construct* a fresh CSR from the
637 // compacted delta snapshot and *install* it — not merely drop
638 // the stale cache. This matches plan §H lines 684–691 (the
639 // committed-then-queried graph has a CSR from the first read).
640 //
641 // Build path: `compaction::snapshot_edges` → compaction merges
642 // in-memory delta (no tombstones left after step 3) →
643 // `build_compacted_csr` constructs an immutable `CsrGraph` →
644 // `swap_csrs_and_clear_deltas` installs both directions and
645 // clears the now-absorbed delta buffers. The node_count is the
646 // post-compaction slot count of the arena (not the live count —
647 // CSR uses dense slot indexing; vacant slots appear as nodes
648 // with zero out-edges).
649 //
650 // The plan's example spells this `self.edges.rebuild_csr()` as
651 // shorthand for this sequence; we expand it to concrete calls
652 // so reviewers can audit each primitive. The post-condition is
653 // identical to the plan's: after step 9 the `CodeGraph` about
654 // to be assembled has no CSR referencing a tombstoned NodeId,
655 // because the CSR was built from the already-filtered delta.
656 // ----------------------------------------------------------------
657 {
658 use crate::graph::unified::compaction::{
659 Direction, build_compacted_csr, snapshot_edges,
660 };
661 let node_count = self.nodes.slot_count();
662 // Snapshot the forward/reverse deltas (CSR is still stale
663 // at this point — `build_compacted_csr` uses the delta's
664 // live-only contents). Because step 3 tombstoned endpoints
665 // out of the delta already, the merged build sees only
666 // live edges.
667 let forward_snapshot = {
668 let forward = self.edges.forward();
669 snapshot_edges(&forward, node_count)
670 };
671 let reverse_snapshot = {
672 let reverse = self.edges.reverse();
673 snapshot_edges(&reverse, node_count)
674 };
675 // Build both directions in parallel (matches the pattern
676 // used by `build/entrypoint.rs`).
677 let (forward_csr, reverse_csr) = rayon::join(
678 || build_compacted_csr(&forward_snapshot, Direction::Forward),
679 || build_compacted_csr(&reverse_snapshot, Direction::Reverse),
680 );
681 let (forward_csr, _) =
682 forward_csr.map_err(|e| crate::graph::error::GraphBuilderError::Internal {
683 reason: format!("rebuild finalize step 9 (forward CSR build): {e}"),
684 })?;
685 let (reverse_csr, _) =
686 reverse_csr.map_err(|e| crate::graph::error::GraphBuilderError::Internal {
687 reason: format!("rebuild finalize step 9 (reverse CSR build): {e}"),
688 })?;
689 // Install both CSRs and clear the absorbed deltas.
690 self.edges
691 .swap_csrs_and_clear_deltas(forward_csr, reverse_csr);
692 }
693
694 // ----------------------------------------------------------------
695 // Step 10 — Per-language confidence update.
696 //
697 // Incremental rebuild must ensure the published confidence map
698 // matches the set of languages that actually have live nodes in
699 // the rebuild's graph. Languages whose only source files were
700 // all removed during this rebuild must not linger in the
701 // confidence surface — they would mislead MCP clients into
702 // believing analysis for that language is still available.
703 //
704 // The rebuild-local `FileRegistry` carries per-file `Language`
705 // tags; we enumerate them, collect the set of languages with at
706 // least one live file, and drop confidence entries for any
707 // language that no longer appears. Entries for still-present
708 // languages are preserved as-is (their `ConfidenceMetadata` is
709 // updated by the plugin-level confidence-ingestion path in
710 // `CodeGraph::merge_confidence`; this step only filters, it
711 // does not fabricate new samples).
712 //
713 // A language entry is preserved when (a) at least one live file
714 // is tagged with that language in the rebuild-local registry,
715 // OR (b) the confidence entry carries a non-default set of
716 // limitations / unavailable-features — that is analysis
717 // metadata the plugin layer deliberately recorded, and dropping
718 // it on a zero-file rebuild would be lossy. Both conditions are
719 // evaluated against the rebuild-local state that will be
720 // published by step 12.
721 // ----------------------------------------------------------------
722 {
723 use std::collections::BTreeSet;
724 let mut active_languages: BTreeSet<String> = BTreeSet::new();
725 for (_file_id, _path, maybe_language) in self.files.iter_with_language() {
726 if let Some(language) = maybe_language {
727 active_languages.insert(language.to_string());
728 }
729 }
730 self.confidence.retain(|language_key, meta| {
731 if active_languages.contains(language_key) {
732 true
733 } else {
734 // Preserve entries that encode deliberate
735 // plugin-recorded limitations even when no live
736 // files remain — those are analysis-state facts
737 // the daemon must not silently drop.
738 !meta.limitations.is_empty() || !meta.unavailable_features.is_empty()
739 }
740 });
741 }
742
743 // ----------------------------------------------------------------
744 // Step 11 — Epoch bump.
745 //
746 // `prior_epoch` captured at `clone_for_rebuild` time; +1 marks
747 // the publication boundary. Wrapping add matches the existing
748 // `CodeGraph::bump_epoch` behavior at
749 // `sqry-core/src/graph/unified/concurrent/graph.rs`.
750 // ----------------------------------------------------------------
751 let new_epoch = self.epoch.wrapping_add(1);
752
753 // ----------------------------------------------------------------
754 // Step 12 — Assemble the immutable `CodeGraph`.
755 //
756 // Every Arc-wrapped field is freshly wrapped from the
757 // rebuild-local owned value, so the new `CodeGraph` has no
758 // Arc-sharing relationship with the pre-rebuild snapshot. This
759 // is the point at which the rebuild-local interner becomes
760 // immutable (step 1's freeze).
761 // ----------------------------------------------------------------
762 let mut graph = CodeGraph::__assemble_from_rebuild_parts_internal(
763 self.nodes,
764 self.edges,
765 self.strings,
766 self.files,
767 self.indices,
768 self.macro_metadata,
769 self.node_provenance,
770 self.edge_provenance,
771 self.fact_epoch,
772 new_epoch,
773 self.confidence,
774 self.scope_arena,
775 self.alias_table,
776 self.shadow_table,
777 self.scope_provenance_store,
778 self.file_segments,
779 self.go_hints,
780 );
781 // Phase A (U09): the assembled `CodeGraph` resets
782 // `c_indirect_tables` to `None`. Re-install the rebuild's owned
783 // value so an in-flight rebuild preserves side tables already
784 // committed on the source graph.
785 graph.set_c_indirect_tables(self.c_indirect_tables);
786
787 // ----------------------------------------------------------------
788 // Steps 13 + 14 — (debug) Publish-boundary invariants.
789 //
790 // Per plan §F.3 "single source of truth", both the §F.1 bucket
791 // bijection and the §F.2 tombstone-residue checks fire through
792 // the canonical `publish::assert_publish_invariants` helper.
793 // That helper is also called at the full-rebuild end
794 // (`build_unified_graph_inner`), by Task 6's
795 // `WorkspaceManager::publish_graph`, and by every §E harness
796 // iteration — so any invariant drift surfaces in all four
797 // places simultaneously, not in a subset.
798 //
799 // The §F.2 assertion is the **single site** against
800 // `self.drained_tombstones` (populated at step 8); we do not
801 // run the residue check anywhere else.
802 // ----------------------------------------------------------------
803 #[cfg(any(debug_assertions, test))]
804 crate::graph::unified::publish::assert_publish_invariants(&graph, &self.drained_tombstones);
805
806 Ok(graph)
807 }
808
809 /// Returns the number of `NodeIds` currently staged for tombstoning
810 /// via [`RebuildGraph::tombstone`] (to be added by Task 4 Step 4).
811 /// Useful for Gate 0c tests that exercise finalize with an empty
812 /// or non-empty tombstone set.
813 #[must_use]
814 pub fn pending_tombstone_count(&self) -> usize {
815 self.tombstones.len()
816 }
817
818 /// Shared-reference accessor for the rebuild-local
819 /// [`NodeArena`]. Read-only; the writer is the rebuild dispatcher
820 /// which holds `&mut self`.
821 ///
822 /// Used by sqry-daemon's `WorkspaceManager` (Task 6) and by the
823 /// Task 4 Step 4 scale test to inspect arena state between
824 /// [`remove_file`](Self::remove_file) calls without going through
825 /// the pub(crate) field directly.
826 #[must_use]
827 pub fn nodes(&self) -> &NodeArena {
828 &self.nodes
829 }
830
831 /// Shared-reference accessor for the rebuild-local
832 /// [`FileRegistry`]. Read-only. The rebuild dispatcher writes via
833 /// `&mut self` on [`remove_file`](Self::remove_file) and
834 /// finalization.
835 #[must_use]
836 pub fn files(&self) -> &FileRegistry {
837 &self.files
838 }
839
840 /// Shared-reference accessor for the rebuild-local
841 /// [`FileSegmentTable`]. Read-only.
842 ///
843 /// `FileSegmentTable` is a plain `Vec<Option<FileSegment>>` with no
844 /// interior mutability (see
845 /// `sqry-core/src/graph/unified/storage/segment.rs`), so this
846 /// accessor is genuinely read-only. Writers ride through the
847 /// `&mut self` mutation path inside
848 /// [`remove_file`](Self::remove_file) and
849 /// [`finalize`](Self::finalize).
850 ///
851 /// Added as part of the iter-1 Codex review fix for Task 4
852 /// Steps 2-3: the `remove_file` integration tests need to assert
853 /// that a file's segment entry is cleared after removal, closing
854 /// the FileId-recycle stale-range bug documented at
855 /// `sqry-core/tests/incremental_remove_file_scale.rs`.
856 #[must_use]
857 pub fn file_segments(&self) -> &FileSegmentTable {
858 &self.file_segments
859 }
860
861 // Note (iter-1 review fix): a `pub fn edges(&self) -> &BidirectionalEdgeStore`
862 // accessor was removed. `BidirectionalEdgeStore` exposes
863 // `forward_mut(&self)` and `reverse_mut(&self)` (interior mutability),
864 // so returning `&BidirectionalEdgeStore` from `&self` would let
865 // external callers escalate a "read-only" handle into a writer. The
866 // rebuild contract requires that edge mutation only happens inside
867 // `RebuildGraph::remove_file` / `finalize` on `&mut self`. If a
868 // read-only edge-view accessor is ever needed by a legitimate
869 // consumer, add a wrapper struct (e.g., `BidirectionalEdgeStoreView`)
870 // that exposes only `forward()` / `reverse()` / `edges_from` /
871 // `edges_to` / `stats` on `&self` — never the `*_mut` methods.
872
873 /// Stage a `NodeId` for tombstoning during the next `finalize` pass.
874 ///
875 /// Gate 0c ships this helper so finalize tests can drive the
876 /// 14-step contract with a realistic tombstone set without waiting
877 /// for Task 4 Step 4's `remove_file` plumbing. Production callers
878 /// will route through `RebuildGraph::remove_file` (Task 4 Step 4),
879 /// which internally populates the same set via
880 /// `FileRegistry::take_nodes`.
881 pub fn tombstone(&mut self, id: NodeId) {
882 self.tombstones.insert(id);
883 }
884
885 /// Stage every `NodeId` in `ids` for tombstoning during the next
886 /// `finalize` pass. Equivalent to calling [`tombstone`](Self::tombstone)
887 /// for each id, but expresses the bulk intent at the call site.
888 ///
889 /// Used by [`remove_file`](Self::remove_file) to fold every
890 /// file-local `NodeId` into the staged set in a single pass.
891 pub(crate) fn tombstone_many<I: IntoIterator<Item = NodeId>>(&mut self, ids: I) {
892 self.tombstones.extend(ids);
893 }
894
895 /// Drain every `NodeId` belonging to `file_id`, invalidate every
896 /// rebuild-local edge whose source or target is one of those nodes
897 /// (across both forward and reverse CSR + delta tiers of the
898 /// rebuild-local edge store), drop the file's entry from the
899 /// rebuild-local [`FileRegistry`], and return the list of
900 /// tombstoned [`NodeId`]s.
901 ///
902 /// This is the rebuild-side mirror of
903 /// [`super::super::concurrent::CodeGraph::remove_file`]. Whereas
904 /// the `CodeGraph` variant mutates a live publishable graph in
905 /// place (O(1) publish once the caller wraps it in an `Arc`), this
906 /// variant operates on the owned, pre-finalize
907 /// [`RebuildGraph`] state — so the edge-store tombstones land in
908 /// the CSR's tombstone bitmap and the delta buffer, and are later
909 /// physically purged by [`finalize`](Self::finalize) step 9 when
910 /// the CSR is rebuilt from the compacted delta (§H lines 684–691).
911 ///
912 /// The method does **not** rewrite `NodeIdBearing` surfaces on the
913 /// rebuild (`NodeArena`, `AuxiliaryIndices`, `NodeMetadataStore`,
914 /// `NodeProvenanceStore`, `ScopeArena`, `AliasTable`, `ShadowTable`) —
915 /// those surfaces are compacted once, uniformly, by `finalize()`
916 /// steps 2–7 against the accumulated `tombstones` set. Running a
917 /// partial compaction here would duplicate work and violate the
918 /// plan's "compact once at step N" contract.
919 ///
920 /// Instead, the tombstoned `NodeId`s are accumulated in
921 /// `self.tombstones` via [`tombstone_many`](Self::tombstone_many).
922 /// Successive `remove_file` calls against different files
923 /// accumulate into the same set; `finalize()` then sweeps every
924 /// K.A/K.B surface once with the union predicate.
925 ///
926 /// # What is tombstoned immediately
927 ///
928 /// Only two surfaces are mutated before `finalize()` runs:
929 ///
930 /// * **`NodeArena`**: each file-local `NodeId` is `remove`d so the
931 /// slot's generation advances and stale handles cannot alias a
932 /// re-allocation. Downstream compaction at step 2 is then a
933 /// no-op for these slots (idempotent; the arena skips stale
934 /// generations).
935 /// * **Edge store** (both forward + reverse, both CSR + delta):
936 /// [`BidirectionalEdgeStore::tombstone_edges_for_nodes`] kills
937 /// every edge whose source or target is one of the drained
938 /// `NodeIds`. CSR tombstones land in `csr_tombstones`; delta
939 /// edges are dropped outright. Step 9 of `finalize` then
940 /// rebuilds a fresh CSR from the tombstone-free delta, so the
941 /// CSR tombstones become physically invisible by publish time.
942 /// * **`FileRegistry`**: the bucket is drained via
943 /// [`FileRegistry::take_nodes`] and the file entry is
944 /// deregistered via [`FileRegistry::unregister`]. Both are
945 /// idempotent on an already-removed file.
946 ///
947 /// # Returned value
948 ///
949 /// The list of `NodeIds` that were staged for tombstoning. Empty on
950 /// an unknown or already-removed file. Useful for Gate 0c finalize
951 /// tests that need to assert on bucket-drain correctness or on the
952 /// union of staged tombstones across several `remove_file` calls.
953 ///
954 /// # Idempotency
955 ///
956 /// Calling twice with the same `file_id` is a no-op on the second
957 /// call. The bucket is already drained, `take_nodes` returns an
958 /// empty `Vec`, the rest of the method short-circuits, and the
959 /// `tombstones` set is unchanged.
960 // Live in the default build: the intra-crate consumer is
961 // `remove_closure_from_rebuild`, reached from the ungated public
962 // `build::incremental::incremental_rebuild` path; the external
963 // consumer is sqry-daemon's `WorkspaceManager` via the feature-gated
964 // `pub use` of `RebuildGraph`. The visibility is `pub` (not
965 // `pub(crate)`) so the daemon can drive file removals through the
966 // rebuild plane, which is the canonical path for file-deletion
967 // events per §F.2.
968 pub fn remove_file(&mut self, file_id: super::super::file::FileId) -> Vec<NodeId> {
969 // Drain the rebuild-local FileRegistry bucket.
970 let tombstoned: Vec<NodeId> = self.files.take_nodes(file_id);
971 // Deregister the file entry unconditionally (idempotent).
972 self.files.unregister(file_id);
973 // Clear the rebuild-local `FileSegmentTable` entry for this
974 // file (idempotent — `remove` no-ops on unknown ids). This
975 // matches `CodeGraph::remove_file`'s behaviour: finalize()
976 // publishes `self.file_segments` verbatim at step 12, so any
977 // stale entry leaked here would survive into the assembled
978 // `CodeGraph`. Because `FileRegistry::unregister` recycles
979 // `FileId` slots, a leaked segment would later alias a
980 // different file's node range after the slot is reissued —
981 // which would cause `reindex_files` to tombstone the wrong
982 // range. Clearing the segment at remove time is the only
983 // defence that closes both the "finalize publishes stale
984 // segment" path AND the "FileId recycle attaches stale
985 // range" path.
986 self.file_segments.remove(file_id);
987
988 if tombstoned.is_empty() {
989 return tombstoned;
990 }
991
992 let dead: std::collections::HashSet<NodeId> = tombstoned.iter().copied().collect();
993
994 // 1. Tombstone each arena slot. `NodeArena::remove` is
995 // idempotent against stale generations, so retriggering a
996 // file removal after finalize is safe.
997 for &nid in &tombstoned {
998 let _ = self.nodes.remove(nid);
999 }
1000
1001 // 2. Invalidate edges across both CSR + delta in both
1002 // directions on the rebuild-local edge store.
1003 self.edges.tombstone_edges_for_nodes(&dead);
1004
1005 // 3. Stage the drained NodeIds for the finalize-time K.A/K.B
1006 // compaction sweep. finalize() will consume these at steps
1007 // 2–7 through the NodeIdBearing::retain_nodes predicate.
1008 self.tombstone_many(tombstoned.iter().copied());
1009
1010 tombstoned
1011 }
1012
1013 /// Returns the rebuild-local epoch captured at `clone_for_rebuild`.
1014 #[must_use]
1015 pub fn prior_epoch(&self) -> u64 {
1016 self.epoch
1017 }
1018
1019 /// Pre-finalize tombstone-residue check on the rebuild-state
1020 /// structures themselves (plan §F.2 literal named API).
1021 ///
1022 /// Iterates every `NodeIdBearing` field on this `RebuildGraph` and
1023 /// panics if any contains a `NodeId` present in `self.tombstones`.
1024 /// This is a diagnostic helper for mid-rebuild consistency checks:
1025 /// e.g., Task 4 Step 4's `RebuildGraph::remove_file` can call this
1026 /// after tombstoning a file but before more closure files land, to
1027 /// prove the just-tombstoned `NodeIds` really left every index before
1028 /// the next pass commits. The `RebuildGraph::finalize` flow then
1029 /// re-asserts against the drained set on the *assembled* `CodeGraph`
1030 /// at step 14 — those are two different snapshots of the same
1031 /// invariant; both must hold.
1032 ///
1033 /// No-op when `self.tombstones` is empty or in release builds.
1034 ///
1035 /// Does **not** replace the step-14 call site; the single source of
1036 /// truth for the publish-boundary residue check remains
1037 /// `crate::graph::unified::publish::assert_publish_invariants`. This
1038 /// helper exists so pre-finalize call sites (Task 4 Step 4's
1039 /// post-remove sanity check, incremental-engine debug probes, Gate
1040 /// 0d negative tests) can name the assertion without rolling their
1041 /// own iteration.
1042 ///
1043 /// # Panics
1044 ///
1045 /// Panics in debug/test builds when any node-bearing rebuild surface
1046 /// still references a node recorded in `self.tombstones`. That indicates
1047 /// the rebuild plane removed a file without fully compacting every
1048 /// dependent arena, edge, and auxiliary index before the next pass.
1049 #[cfg(any(debug_assertions, test))]
1050 pub fn assert_no_tombstone_residue(&self) {
1051 use super::coverage::NodeIdBearing;
1052 let dead = &self.tombstones;
1053 if dead.is_empty() {
1054 return;
1055 }
1056 for nid in self.nodes.all_node_ids() {
1057 assert!(
1058 !dead.contains(&nid),
1059 "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in NodeArena"
1060 );
1061 }
1062 for nid in self.edges.all_node_ids() {
1063 assert!(
1064 !dead.contains(&nid),
1065 "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in edge store"
1066 );
1067 }
1068 for nid in self.indices.all_node_ids() {
1069 assert!(
1070 !dead.contains(&nid),
1071 "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in auxiliary indices"
1072 );
1073 }
1074 for nid in self.macro_metadata.all_node_ids() {
1075 assert!(
1076 !dead.contains(&nid),
1077 "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in macro metadata"
1078 );
1079 }
1080 for nid in self.node_provenance.all_node_ids() {
1081 assert!(
1082 !dead.contains(&nid),
1083 "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in node provenance"
1084 );
1085 }
1086 for nid in self.scope_arena.all_node_ids() {
1087 assert!(
1088 !dead.contains(&nid),
1089 "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in scope arena"
1090 );
1091 }
1092 for nid in self.alias_table.all_node_ids() {
1093 assert!(
1094 !dead.contains(&nid),
1095 "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in alias table"
1096 );
1097 }
1098 for nid in self.shadow_table.all_node_ids() {
1099 assert!(
1100 !dead.contains(&nid),
1101 "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in shadow table"
1102 );
1103 }
1104 for nid in self.files.all_node_ids() {
1105 assert!(
1106 !dead.contains(&nid),
1107 "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in per-file bucket"
1108 );
1109 }
1110 }
1111}
1112
1113// =====================================================================
1114// Step 1 helper: rewrite every live `NodeEntry`'s interned-string
1115// fields through the dedup-remap table produced by
1116// `StringInterner::build_dedup_table()`.
1117// =====================================================================
1118
1119/// Rewrite every `NodeEntry` field that holds a [`crate::graph::unified::string::id::StringId`]
1120/// through `remap` so no live node points at a collapsed-duplicate
1121/// slot. Called from finalize step 1 after the interner has been
1122/// canonicalised; a no-op when `remap` is empty (the common case of a
1123/// no-op rebuild).
1124///
1125/// Fields rewritten: `name`, `signature`, `doc`, `qualified_name`,
1126/// `visibility`. These are every `StringId` / `Option<StringId>` field
1127/// that `NodeEntry` currently declares. Adding a new `StringId`-valued
1128/// field to `NodeEntry` requires extending this helper — the closest
1129/// enforcement is the compile-fail coverage in the `sqry_graph_fields!`
1130/// macro which forces a new `RebuildGraph` field and a matching
1131/// finalize step for any new `CodeGraph` field; individual arena
1132/// fields are caught at review time against this helper.
1133fn rewrite_node_entries_through_remap(
1134 nodes: &mut NodeArena,
1135 remap: &HashMap<
1136 crate::graph::unified::string::id::StringId,
1137 crate::graph::unified::string::id::StringId,
1138 >,
1139) {
1140 if remap.is_empty() {
1141 return;
1142 }
1143 for (_id, entry) in nodes.iter_mut() {
1144 if let Some(&canon) = remap.get(&entry.name) {
1145 entry.name = canon;
1146 }
1147 if let Some(sid) = entry.signature
1148 && let Some(&canon) = remap.get(&sid)
1149 {
1150 entry.signature = Some(canon);
1151 }
1152 if let Some(sid) = entry.doc
1153 && let Some(&canon) = remap.get(&sid)
1154 {
1155 entry.doc = Some(canon);
1156 }
1157 if let Some(sid) = entry.qualified_name
1158 && let Some(&canon) = remap.get(&sid)
1159 {
1160 entry.qualified_name = Some(canon);
1161 }
1162 if let Some(sid) = entry.visibility
1163 && let Some(&canon) = remap.get(&sid)
1164 {
1165 entry.visibility = Some(canon);
1166 }
1167 }
1168}
1169
1170// =====================================================================
1171// Assembly path notes (A2 §H "Type-enforced publish path")
1172// =====================================================================
1173//
1174// The only route from `RebuildGraph` to `Arc<CodeGraph>` is:
1175//
1176// let code_graph = rebuild.finalize()?; // Step 12 assembles CodeGraph
1177// let arc = Arc::new(code_graph); // caller wraps (publish site)
1178//
1179// The assembly inside `finalize` uses
1180// `CodeGraph::__assemble_from_rebuild_parts_internal`, which is
1181// `pub(crate)` on `CodeGraph`. That means downstream crates — even
1182// `sqry-daemon` with `rebuild-internals` enabled — cannot call the
1183// assembler directly; they must route through `finalize()`, which
1184// guarantees the 14-step compaction sequence runs first.
1185//
1186// The trybuild fixture
1187// `sqry-core/tests/rebuild_internals_compile_fail/rebuild_graph_no_public_assembly.rs`
1188// exercises this: a downstream crate attempting to call
1189// `__assemble_from_rebuild_parts_internal` fails with E0603 ("function
1190// is private"), and attempting to `impl From<RebuildGraph> for CodeGraph`
1191// from outside this module fails with E0117 (orphan-rule violation).
1192
1193// =====================================================================
1194// Tests
1195// =====================================================================
1196
1197#[cfg(test)]
1198mod tests {
1199 use super::*;
1200 use crate::graph::unified::file::FileId;
1201 use crate::graph::unified::node::NodeKind;
1202 use crate::graph::unified::storage::arena::NodeEntry;
1203 use crate::graph::unified::storage::metadata::MacroNodeMetadata;
1204
1205 /// Seed a `CodeGraph` with a handful of live nodes distributed over
1206 /// two files so the per-field compaction steps have something
1207 /// non-trivial to operate on.
1208 fn seeded_graph() -> (CodeGraph, NodeId, NodeId, NodeId) {
1209 let mut graph = CodeGraph::new();
1210 let file_a = FileId::new(1);
1211 let file_b = FileId::new(2);
1212 let sym = graph.strings_mut().intern("symbol_a").expect("intern a");
1213 let node_a;
1214 let node_b;
1215 let node_c;
1216 {
1217 let arena = graph.nodes_mut();
1218 node_a = arena
1219 .alloc(NodeEntry::new(NodeKind::Function, sym, file_a))
1220 .expect("alloc a");
1221 node_b = arena
1222 .alloc(NodeEntry::new(NodeKind::Method, sym, file_a))
1223 .expect("alloc b");
1224 node_c = arena
1225 .alloc(NodeEntry::new(NodeKind::Struct, sym, file_b))
1226 .expect("alloc c");
1227 }
1228 graph
1229 .indices_mut()
1230 .add(node_a, NodeKind::Function, sym, Some(sym), file_a);
1231 graph
1232 .indices_mut()
1233 .add(node_b, NodeKind::Method, sym, Some(sym), file_a);
1234 graph
1235 .indices_mut()
1236 .add(node_c, NodeKind::Struct, sym, Some(sym), file_b);
1237 graph
1238 .macro_metadata_mut()
1239 .insert(node_a, MacroNodeMetadata::default());
1240 (graph, node_a, node_b, node_c)
1241 }
1242
1243 #[test]
1244 fn clone_for_rebuild_copies_every_field_without_arc_sharing() {
1245 let (graph, _a, _b, _c) = seeded_graph();
1246 let rebuild = graph.clone_for_rebuild();
1247
1248 // Field-level counts match the source graph.
1249 assert_eq!(rebuild.nodes.len(), graph.nodes().len());
1250 assert_eq!(rebuild.macro_metadata.len(), graph.macro_metadata().len());
1251 // The rebuild owns its own arena (not an Arc share), so
1252 // mutating it in place would not affect the source graph. We
1253 // verify this by tombstoning a node in the rebuild and
1254 // confirming the source is unchanged.
1255 let mut rebuild = rebuild;
1256 let ids: Vec<NodeId> = rebuild.nodes.all_node_ids().collect();
1257 let victim = *ids.first().expect("at least one node");
1258 rebuild.tombstone(victim);
1259 assert_eq!(rebuild.pending_tombstone_count(), 1);
1260 // Source graph unaffected.
1261 assert_eq!(graph.nodes().len(), ids.len());
1262 }
1263
1264 #[test]
1265 fn clone_for_rebuild_preserves_epoch_and_fact_epoch() {
1266 let (mut graph, _, _, _) = seeded_graph();
1267 graph.set_epoch(7);
1268 let rebuild = graph.clone_for_rebuild();
1269 assert_eq!(rebuild.prior_epoch(), 7);
1270 assert_eq!(rebuild.fact_epoch, graph.fact_epoch());
1271 }
1272
1273 // ---- Step-level finalize tests ---------------------------------
1274
1275 #[test]
1276 fn finalize_step11_bumps_epoch_by_one() {
1277 let (mut graph, _, _, _) = seeded_graph();
1278 graph.set_epoch(41);
1279 let rebuild = graph.clone_for_rebuild();
1280 let finalized = rebuild.finalize().expect("finalize ok");
1281 assert_eq!(finalized.epoch(), 42);
1282 }
1283
1284 #[test]
1285 fn finalize_step8_drains_tombstones_into_drained_set() {
1286 // We use a reach-into-guts test: finalize consumes self, so we
1287 // verify the drain via a direct RebuildGraph constructed inside
1288 // this module (which has crate visibility into the field).
1289 let (graph, a, _b, _c) = seeded_graph();
1290 let mut rebuild = graph.clone_for_rebuild();
1291 rebuild.tombstone(a);
1292 assert_eq!(rebuild.pending_tombstone_count(), 1);
1293 // After finalize, the graph must not contain `a`.
1294 let finalized = rebuild.finalize().expect("finalize ok");
1295 assert!(
1296 finalized.nodes().get(a).is_none(),
1297 "tombstoned node must be gone from arena"
1298 );
1299 }
1300
1301 #[test]
1302 fn finalize_steps_2_and_4_compact_arena_and_indices_consistently() {
1303 let (graph, a, _b, c) = seeded_graph();
1304 let mut rebuild = graph.clone_for_rebuild();
1305 rebuild.tombstone(a);
1306 rebuild.tombstone(c);
1307 let finalized = rebuild.finalize().expect("finalize ok");
1308
1309 // Arena compaction (step 2): dropped nodes are gone.
1310 assert!(finalized.nodes().get(a).is_none());
1311 assert!(finalized.nodes().get(c).is_none());
1312 // Step 4 must compact the auxiliary indices in lockstep, so
1313 // the tombstoned ids do not linger in any index.
1314 use crate::graph::unified::storage::metadata::TypedMetadata;
1315 let _ = TypedMetadata::Macro(MacroNodeMetadata::default()); // silence unused import warning on some cfgs
1316 assert!(!finalized.indices().by_kind(NodeKind::Function).contains(&a));
1317 assert!(!finalized.indices().by_kind(NodeKind::Struct).contains(&c));
1318 }
1319
1320 #[test]
1321 fn finalize_step5_compacts_macro_metadata() {
1322 let (graph, a, _b, _c) = seeded_graph();
1323 let mut rebuild = graph.clone_for_rebuild();
1324 rebuild.tombstone(a);
1325 let finalized = rebuild.finalize().expect("finalize ok");
1326 // `a` had macro metadata seeded; after finalize it must be gone.
1327 assert!(finalized.macro_metadata().get_macro(a).is_none());
1328 }
1329
1330 #[test]
1331 fn finalize_step9_installs_rebuilt_csr() {
1332 // Step 9 (plan §H lines 684–691) must install a freshly built
1333 // CSR — not merely drop the stale cache. The assembled graph's
1334 // forward and reverse edge stores must both expose a `csr()`
1335 // after finalize, and the deltas must be empty (absorbed by
1336 // the swap).
1337 let (graph, _, _, _) = seeded_graph();
1338 let rebuild = graph.clone_for_rebuild();
1339 let finalized = rebuild.finalize().expect("finalize ok");
1340 assert!(
1341 finalized.edges().forward().csr().is_some(),
1342 "step 9 must install forward CSR"
1343 );
1344 assert!(
1345 finalized.edges().reverse().csr().is_some(),
1346 "step 9 must install reverse CSR"
1347 );
1348 // Deltas were absorbed by the swap.
1349 assert_eq!(finalized.edges().forward().delta_count(), 0);
1350 assert_eq!(finalized.edges().reverse().delta_count(), 0);
1351 }
1352
1353 #[test]
1354 fn finalize_step10_drops_confidence_for_removed_languages() {
1355 // Seed a graph with Rust + Python confidence; the rebuild-local
1356 // FileRegistry has zero files tagged (seeded_graph does not
1357 // register paths), so step 10 must drop both.
1358 let (mut graph, _, _, _) = seeded_graph();
1359 graph.merge_confidence(
1360 "rust",
1361 crate::confidence::ConfidenceMetadata {
1362 level: crate::confidence::ConfidenceLevel::Verified,
1363 ..Default::default()
1364 },
1365 );
1366 graph.merge_confidence(
1367 "python",
1368 crate::confidence::ConfidenceMetadata {
1369 level: crate::confidence::ConfidenceLevel::Partial,
1370 ..Default::default()
1371 },
1372 );
1373 assert_eq!(graph.confidence().len(), 2);
1374
1375 let rebuild = graph.clone_for_rebuild();
1376 let finalized = rebuild.finalize().expect("finalize ok");
1377 assert!(
1378 finalized.confidence().is_empty(),
1379 "step 10 must drop confidence entries for languages that \
1380 have no live files and no recorded limitations"
1381 );
1382 }
1383
1384 #[test]
1385 fn finalize_step10_preserves_confidence_with_limitations() {
1386 // A confidence entry that encodes deliberate limitations must
1387 // survive a zero-file rebuild: dropping it would silently lose
1388 // plugin-recorded analysis state.
1389 let (mut graph, _, _, _) = seeded_graph();
1390 graph.merge_confidence(
1391 "rust",
1392 crate::confidence::ConfidenceMetadata {
1393 level: crate::confidence::ConfidenceLevel::AstOnly,
1394 limitations: vec!["no rust-analyzer".to_string()],
1395 unavailable_features: vec!["type inference".to_string()],
1396 },
1397 );
1398 let rebuild = graph.clone_for_rebuild();
1399 let finalized = rebuild.finalize().expect("finalize ok");
1400 assert_eq!(finalized.confidence().len(), 1);
1401 assert!(finalized.confidence().contains_key("rust"));
1402 }
1403
1404 #[test]
1405 fn finalize_step1_canonicalises_interner_via_dedup() {
1406 // Freeze step must leave the interner's lookup consistent
1407 // (non-stale) and zero duplicate slots after finalize.
1408 let (graph, _, _, _) = seeded_graph();
1409 let rebuild = graph.clone_for_rebuild();
1410 let finalized = rebuild.finalize().expect("finalize ok");
1411 assert!(
1412 !finalized.strings().is_lookup_stale(),
1413 "step 1 freeze must leave lookup_stale == false"
1414 );
1415 }
1416
1417 #[test]
1418 fn finalize_is_infallible_on_empty_tombstone_set() {
1419 let (graph, _, _, _) = seeded_graph();
1420 let rebuild = graph.clone_for_rebuild();
1421 let result = rebuild.finalize();
1422 assert!(result.is_ok(), "empty-tombstone finalize must succeed");
1423 let finalized = result.unwrap();
1424 // Every node survives.
1425 assert_eq!(finalized.nodes().len(), 3);
1426 }
1427
1428 #[test]
1429 fn finalize_survives_interner_snapshot_unchanged_when_no_edits() {
1430 let (graph, _, _, _) = seeded_graph();
1431 let prior_string_count = graph.strings().len();
1432 let rebuild = graph.clone_for_rebuild();
1433 let finalized = rebuild.finalize().expect("finalize ok");
1434 assert_eq!(
1435 finalized.strings().len(),
1436 prior_string_count,
1437 "freeze step must preserve string count across a no-op rebuild"
1438 );
1439 }
1440
1441 #[test]
1442 fn rebuild_graph_pending_tombstone_count_is_accurate() {
1443 let (graph, a, b, c) = seeded_graph();
1444 let mut rebuild = graph.clone_for_rebuild();
1445 assert_eq!(rebuild.pending_tombstone_count(), 0);
1446 rebuild.tombstone(a);
1447 rebuild.tombstone(b);
1448 rebuild.tombstone(c);
1449 // Inserting the same id again is idempotent.
1450 rebuild.tombstone(a);
1451 assert_eq!(rebuild.pending_tombstone_count(), 3);
1452 }
1453
1454 // ----------------------------------------------------------------
1455 // Iter-2 B1: StringId remap across every StringId-backed holder.
1456 // ----------------------------------------------------------------
1457
1458 /// Construct a RebuildGraph whose interner has intentional duplicate
1459 /// `StringId` slots (two slots containing the same canonical
1460 /// "symbol_dup" bytes), with the **live** state referencing both
1461 /// slots. `build_dedup_table()` must collapse them, step 1 must
1462 /// rewrite every StringId-backed surface through the remap, and
1463 /// step 4 must leave no bucket pointing at a recycled StringId.
1464 #[test]
1465 fn step1_remaps_auxiliary_indices_keys_through_dedup_table() {
1466 use crate::graph::unified::storage::interner::StringInterner;
1467
1468 let mut graph = CodeGraph::new();
1469 // Intern "alpha" twice via the bulk path so the two slots are
1470 // NOT coalesced at intern time. We emulate that by manually
1471 // using `alloc_range` + direct bulk_slices_mut assignment on
1472 // the interner.
1473 let interner: &mut StringInterner = graph.strings_mut();
1474 let start = interner.alloc_range(2).expect("alloc range");
1475 {
1476 let (s_slots, rc_slots) = interner.bulk_slices_mut(start, 2);
1477 s_slots[0] = Some(std::sync::Arc::from("alpha"));
1478 s_slots[1] = Some(std::sync::Arc::from("alpha"));
1479 rc_slots[0] = 1;
1480 rc_slots[1] = 1;
1481 }
1482 let id_dup = crate::graph::unified::string::id::StringId::new(start + 1);
1483 let id_canon = crate::graph::unified::string::id::StringId::new(start);
1484 let file = FileId::new(1);
1485
1486 // Allocate one node that references the duplicate StringId so
1487 // after dedup the arena still points at a canonical id.
1488 let mut entry = NodeEntry::new(NodeKind::Function, id_dup, file);
1489 entry.qualified_name = Some(id_dup);
1490 let node = graph.nodes_mut().alloc(entry).expect("alloc");
1491 graph
1492 .indices_mut()
1493 .add(node, NodeKind::Function, id_dup, Some(id_dup), file);
1494
1495 let rebuild = graph.clone_for_rebuild();
1496 let finalized = rebuild.finalize().expect("finalize ok");
1497
1498 // After step 1 the duplicate slot must be recycled and every
1499 // live surface must reference the canonical slot.
1500 // (a) Arena field references canonical
1501 let entry = finalized.nodes().get(node).expect("node survives");
1502 assert_eq!(entry.name, id_canon, "NodeEntry.name must be canonicalised");
1503 assert_eq!(
1504 entry.qualified_name,
1505 Some(id_canon),
1506 "NodeEntry.qualified_name must be canonicalised"
1507 );
1508 // (b) AuxiliaryIndices: by_name must find the node under canonical id.
1509 assert!(
1510 finalized.indices().by_name(id_canon).contains(&node),
1511 "indices.by_name under canonical StringId must contain the node"
1512 );
1513 // (c) And must NOT have any bucket under the duplicate StringId.
1514 assert!(
1515 finalized.indices().by_name(id_dup).is_empty(),
1516 "duplicate StringId bucket must be empty after remap"
1517 );
1518 assert!(
1519 finalized.indices().by_qualified_name(id_dup).is_empty(),
1520 "duplicate StringId qname bucket must be empty after remap"
1521 );
1522 // (d) Interner has no stale lookup.
1523 assert!(!finalized.strings().is_lookup_stale());
1524 }
1525
1526 #[test]
1527 fn step1_merges_aux_indices_buckets_when_keys_collapse() {
1528 use crate::graph::unified::storage::interner::StringInterner;
1529
1530 let mut graph = CodeGraph::new();
1531 let interner: &mut StringInterner = graph.strings_mut();
1532 let start = interner.alloc_range(2).expect("alloc range");
1533 {
1534 let (s_slots, rc_slots) = interner.bulk_slices_mut(start, 2);
1535 s_slots[0] = Some(std::sync::Arc::from("beta"));
1536 s_slots[1] = Some(std::sync::Arc::from("beta"));
1537 rc_slots[0] = 1;
1538 rc_slots[1] = 1;
1539 }
1540 let id_canon = crate::graph::unified::string::id::StringId::new(start);
1541 let id_dup = crate::graph::unified::string::id::StringId::new(start + 1);
1542 let file = FileId::new(1);
1543
1544 // Two nodes: one references canonical, one references duplicate.
1545 // After remap, both must land in the canonical bucket of
1546 // `name_index` (merged, deduplicated by NodeId).
1547 let node_canon = graph
1548 .nodes_mut()
1549 .alloc(NodeEntry::new(NodeKind::Function, id_canon, file))
1550 .expect("alloc canon");
1551 let node_dup = graph
1552 .nodes_mut()
1553 .alloc(NodeEntry::new(NodeKind::Function, id_dup, file))
1554 .expect("alloc dup");
1555 graph
1556 .indices_mut()
1557 .add(node_canon, NodeKind::Function, id_canon, None, file);
1558 graph
1559 .indices_mut()
1560 .add(node_dup, NodeKind::Function, id_dup, None, file);
1561
1562 let rebuild = graph.clone_for_rebuild();
1563 let finalized = rebuild.finalize().expect("finalize ok");
1564
1565 let canonical_bucket = finalized.indices().by_name(id_canon);
1566 assert!(canonical_bucket.contains(&node_canon));
1567 assert!(canonical_bucket.contains(&node_dup));
1568 // No duplicates within the merged bucket.
1569 let mut seen = std::collections::HashSet::new();
1570 for id in canonical_bucket {
1571 assert!(seen.insert(*id), "bucket must be dedup'd by NodeId");
1572 }
1573 assert!(
1574 finalized.indices().by_name(id_dup).is_empty(),
1575 "collapsed duplicate key bucket must be removed"
1576 );
1577 }
1578
1579 #[test]
1580 fn step1_remaps_file_registry_source_uri() {
1581 use crate::graph::unified::storage::interner::StringInterner;
1582
1583 let mut graph = CodeGraph::new();
1584 // Force two identical "file:///foo" StringId slots.
1585 let interner: &mut StringInterner = graph.strings_mut();
1586 let start = interner.alloc_range(2).expect("alloc range");
1587 {
1588 let (s, rc) = interner.bulk_slices_mut(start, 2);
1589 s[0] = Some(std::sync::Arc::from("file:///foo"));
1590 s[1] = Some(std::sync::Arc::from("file:///foo"));
1591 rc[0] = 1;
1592 rc[1] = 1;
1593 }
1594 let id_canon = crate::graph::unified::string::id::StringId::new(start);
1595 let id_dup = crate::graph::unified::string::id::StringId::new(start + 1);
1596
1597 let fid = graph
1598 .files_mut()
1599 .register_external_with_uri(
1600 "/virtual/Foo.class",
1601 Some(crate::graph::node::Language::Java),
1602 Some(id_dup),
1603 )
1604 .expect("register external");
1605
1606 let rebuild = graph.clone_for_rebuild();
1607 let finalized = rebuild.finalize().expect("finalize ok");
1608
1609 let view = finalized
1610 .files()
1611 .file_provenance(fid)
1612 .expect("provenance present");
1613 assert_eq!(
1614 view.source_uri,
1615 Some(id_canon),
1616 "FileRegistry source_uri must be rewritten to canonical StringId"
1617 );
1618 }
1619
1620 // ----------------------------------------------------------------
1621 // Iter-4 B1: edge-store StringId remap across delta + CSR tiers.
1622 // ----------------------------------------------------------------
1623
1624 /// Construct a `RebuildGraph` whose interner has three slots
1625 /// containing the same canonical bytes ("symbol_edge_dup"), with
1626 /// two distinct `EdgeKind::Imports { alias }` edges pointing at
1627 /// different duplicate slots. Step 1 must rewrite both aliases
1628 /// through the dedup table so the post-step-9 CSR references only
1629 /// the canonical slot; the duplicate slots must be recycled by
1630 /// `recycle_unreferenced` (ref_count == 0) and the canonical slot
1631 /// must retain references.
1632 ///
1633 /// This test intentionally exercises both the delta-tier write path
1634 /// (`DeltaBuffer::iter_mut`) *during* step 1 (where the edges still
1635 /// live in the delta buffer because no compaction has been run)
1636 /// and the CSR-tier read path (where the edges land after step 9's
1637 /// `swap_csrs_and_clear_deltas`). The companion test
1638 /// `step1_remaps_edge_kind_string_ids_in_csr_tier` covers the
1639 /// symmetric case where edges start in the CSR tier at the time
1640 /// step 1 runs.
1641 #[test]
1642 fn step1_remaps_edge_kind_string_ids_through_dedup_table() {
1643 use crate::graph::unified::edge::EdgeKind;
1644 use crate::graph::unified::storage::interner::StringInterner;
1645
1646 let mut graph = CodeGraph::new();
1647 let file_a = FileId::new(1);
1648 // Force three slots carrying the same canonical bytes so
1649 // `build_dedup_table()` collapses slot[start+1] and slot[start+2]
1650 // onto slot[start].
1651 let interner: &mut StringInterner = graph.strings_mut();
1652 let start = interner.alloc_range(3).expect("alloc range");
1653 {
1654 let (s, rc) = interner.bulk_slices_mut(start, 3);
1655 s[0] = Some(std::sync::Arc::from("symbol_edge_dup"));
1656 s[1] = Some(std::sync::Arc::from("symbol_edge_dup"));
1657 s[2] = Some(std::sync::Arc::from("symbol_edge_dup"));
1658 rc[0] = 1;
1659 rc[1] = 1;
1660 rc[2] = 1;
1661 }
1662 let id_canon = crate::graph::unified::string::id::StringId::new(start);
1663 let id_dup_1 = crate::graph::unified::string::id::StringId::new(start + 1);
1664 let id_dup_2 = crate::graph::unified::string::id::StringId::new(start + 2);
1665
1666 // Allocate two nodes so we can hang two edges off distinct
1667 // (src, tgt) pairs and verify both are remapped.
1668 let src;
1669 let tgt;
1670 {
1671 let arena = graph.nodes_mut();
1672 src = arena
1673 .alloc(NodeEntry::new(NodeKind::Function, id_canon, file_a))
1674 .expect("alloc src");
1675 tgt = arena
1676 .alloc(NodeEntry::new(NodeKind::Function, id_canon, file_a))
1677 .expect("alloc tgt");
1678 }
1679 graph
1680 .indices_mut()
1681 .add(src, NodeKind::Function, id_canon, None, file_a);
1682 graph
1683 .indices_mut()
1684 .add(tgt, NodeKind::Function, id_canon, None, file_a);
1685
1686 // Edge 1: Imports { alias = id_dup_1 } — forward src→tgt.
1687 graph.edges_mut().add_edge(
1688 src,
1689 tgt,
1690 EdgeKind::Imports {
1691 alias: Some(id_dup_1),
1692 is_wildcard: false,
1693 },
1694 file_a,
1695 );
1696 // Edge 2: Imports { alias = id_dup_2 } — forward tgt→src (so the
1697 // two edges are distinct keys in the store).
1698 graph.edges_mut().add_edge(
1699 tgt,
1700 src,
1701 EdgeKind::Imports {
1702 alias: Some(id_dup_2),
1703 is_wildcard: true,
1704 },
1705 file_a,
1706 );
1707
1708 // Pre-finalize sanity: the edges live in the delta tier.
1709 assert!(
1710 graph.edges().forward().delta_count() >= 2,
1711 "pre-finalize: forward delta must hold both Imports edges"
1712 );
1713 assert!(
1714 graph.edges().reverse().delta_count() >= 2,
1715 "pre-finalize: reverse delta must hold the mirror of both Imports edges"
1716 );
1717
1718 let rebuild = graph.clone_for_rebuild();
1719 let finalized = rebuild.finalize().expect("finalize ok");
1720
1721 // After finalize step 9, edges have been absorbed into the CSR
1722 // tier by `swap_csrs_and_clear_deltas`. Step 1's `EdgeKind`
1723 // rewrite ran on the delta *before* step 9 snapshotted it, so
1724 // the CSR we assert on is built from the already-remapped delta.
1725 //
1726 // Forward CSR: every `Imports` alias must be canonical.
1727 let forward = finalized.edges().forward();
1728 let fwd_csr = forward
1729 .csr()
1730 .expect("forward CSR must be populated after step 9");
1731 let mut fwd_imports_seen = 0usize;
1732 for kind in fwd_csr.edge_kind() {
1733 if let EdgeKind::Imports { alias, .. } = kind {
1734 fwd_imports_seen += 1;
1735 assert_ne!(
1736 *alias,
1737 Some(id_dup_1),
1738 "forward CSR Imports alias must not reference pre-dedup slot id_dup_1"
1739 );
1740 assert_ne!(
1741 *alias,
1742 Some(id_dup_2),
1743 "forward CSR Imports alias must not reference pre-dedup slot id_dup_2"
1744 );
1745 assert_eq!(
1746 *alias,
1747 Some(id_canon),
1748 "forward CSR Imports alias must be canonicalised"
1749 );
1750 }
1751 }
1752 drop(forward);
1753 assert!(
1754 fwd_imports_seen >= 2,
1755 "forward CSR must hold both Imports edges after finalize (saw {fwd_imports_seen})"
1756 );
1757
1758 // Reverse CSR: mirror edges must be canonicalised identically.
1759 let reverse = finalized.edges().reverse();
1760 let rev_csr = reverse
1761 .csr()
1762 .expect("reverse CSR must be populated after step 9");
1763 let mut rev_imports_seen = 0usize;
1764 for kind in rev_csr.edge_kind() {
1765 if let EdgeKind::Imports { alias, .. } = kind {
1766 rev_imports_seen += 1;
1767 assert_ne!(*alias, Some(id_dup_1));
1768 assert_ne!(*alias, Some(id_dup_2));
1769 assert_eq!(*alias, Some(id_canon));
1770 }
1771 }
1772 drop(reverse);
1773 assert!(
1774 rev_imports_seen >= 2,
1775 "reverse CSR must mirror the forward Imports edges (saw {rev_imports_seen})"
1776 );
1777
1778 // Step-1 (c) invariant: after `recycle_unreferenced`, the
1779 // duplicate slots must report `ref_count == 0`. If step 1's
1780 // edge-store remap had skipped `EdgeKind` payloads, the
1781 // duplicates' refcounts would stay positive (held by the edge
1782 // store) and recycle would not reclaim them.
1783 assert_eq!(
1784 finalized.strings().ref_count(id_dup_1),
1785 0,
1786 "duplicate slot id_dup_1 must be recycled (ref_count == 0) by step 1"
1787 );
1788 assert_eq!(
1789 finalized.strings().ref_count(id_dup_2),
1790 0,
1791 "duplicate slot id_dup_2 must be recycled (ref_count == 0) by step 1"
1792 );
1793 assert!(
1794 finalized.strings().ref_count(id_canon) > 0,
1795 "canonical slot must retain references from the two Imports edges (got {})",
1796 finalized.strings().ref_count(id_canon)
1797 );
1798 }
1799
1800 /// Same construction as above, but compact the edges into a CSR tier
1801 /// before calling finalize so the rewrite covers the steady-state
1802 /// `CsrGraph::edge_kind` path. This exercises the branch of
1803 /// `BidirectionalEdgeStore::rewrite_edge_kind_string_ids_through_remap`
1804 /// that is otherwise dead in the tests above.
1805 #[test]
1806 fn step1_remaps_edge_kind_string_ids_in_csr_tier() {
1807 use crate::graph::unified::compaction::{Direction, build_compacted_csr, snapshot_edges};
1808 use crate::graph::unified::edge::EdgeKind;
1809 use crate::graph::unified::storage::interner::StringInterner;
1810
1811 let mut graph = CodeGraph::new();
1812 let file_a = FileId::new(1);
1813 let interner: &mut StringInterner = graph.strings_mut();
1814 let start = interner.alloc_range(2).expect("alloc range");
1815 {
1816 let (s, rc) = interner.bulk_slices_mut(start, 2);
1817 s[0] = Some(std::sync::Arc::from("csr_alias_dup"));
1818 s[1] = Some(std::sync::Arc::from("csr_alias_dup"));
1819 rc[0] = 1;
1820 rc[1] = 1;
1821 }
1822 let id_canon = crate::graph::unified::string::id::StringId::new(start);
1823 let id_dup = crate::graph::unified::string::id::StringId::new(start + 1);
1824
1825 let src;
1826 let tgt;
1827 {
1828 let arena = graph.nodes_mut();
1829 src = arena
1830 .alloc(NodeEntry::new(NodeKind::Function, id_canon, file_a))
1831 .expect("alloc src");
1832 tgt = arena
1833 .alloc(NodeEntry::new(NodeKind::Function, id_canon, file_a))
1834 .expect("alloc tgt");
1835 }
1836 graph
1837 .indices_mut()
1838 .add(src, NodeKind::Function, id_canon, None, file_a);
1839 graph
1840 .indices_mut()
1841 .add(tgt, NodeKind::Function, id_canon, None, file_a);
1842
1843 // Push the Imports edge into delta with alias pointing at the
1844 // duplicate StringId slot.
1845 graph.edges_mut().add_edge(
1846 src,
1847 tgt,
1848 EdgeKind::Imports {
1849 alias: Some(id_dup),
1850 is_wildcard: false,
1851 },
1852 file_a,
1853 );
1854
1855 // Compact delta → CSR on both directions. This mirrors what the
1856 // full-build pipeline does after Phase 4d plus an explicit
1857 // compaction step.
1858 let node_count = 2;
1859 let edges = graph.edges_mut();
1860 let fwd_snap = snapshot_edges(&edges.forward(), node_count);
1861 let (forward_csr, _) =
1862 build_compacted_csr(&fwd_snap, Direction::Forward).expect("forward csr");
1863 let rev_snap = snapshot_edges(&edges.reverse(), node_count);
1864 let (reverse_csr, _) =
1865 build_compacted_csr(&rev_snap, Direction::Reverse).expect("reverse csr");
1866 edges.swap_csrs_and_clear_deltas(forward_csr, reverse_csr);
1867 assert!(
1868 edges.forward().csr().is_some(),
1869 "forward CSR must be present"
1870 );
1871 assert!(
1872 edges.reverse().csr().is_some(),
1873 "reverse CSR must be present"
1874 );
1875 assert_eq!(edges.forward().delta_count(), 0, "delta must be empty");
1876 assert_eq!(edges.reverse().delta_count(), 0, "delta must be empty");
1877
1878 let rebuild = graph.clone_for_rebuild();
1879 let finalized = rebuild.finalize().expect("finalize ok");
1880
1881 // CSR tier: every `edge_kind` entry must reference the canonical
1882 // StringId, never the duplicate.
1883 let forward = finalized.edges().forward();
1884 let csr = forward
1885 .csr()
1886 .expect("forward CSR must survive finalize step 1");
1887 let mut imports_seen = 0usize;
1888 for kind in csr.edge_kind() {
1889 if let EdgeKind::Imports { alias, .. } = kind {
1890 imports_seen += 1;
1891 assert_ne!(
1892 *alias,
1893 Some(id_dup),
1894 "CSR Imports alias must not reference pre-dedup duplicate"
1895 );
1896 assert_eq!(
1897 *alias,
1898 Some(id_canon),
1899 "CSR Imports alias must be canonicalised"
1900 );
1901 }
1902 }
1903 assert!(
1904 imports_seen > 0,
1905 "forward CSR must hold at least one Imports edge"
1906 );
1907 drop(forward);
1908
1909 // Reverse CSR — same guarantee.
1910 let reverse = finalized.edges().reverse();
1911 let rev_csr = reverse
1912 .csr()
1913 .expect("reverse CSR must survive finalize step 1");
1914 let mut rev_imports_seen = 0usize;
1915 for kind in rev_csr.edge_kind() {
1916 if let EdgeKind::Imports { alias, .. } = kind {
1917 rev_imports_seen += 1;
1918 assert_ne!(*alias, Some(id_dup));
1919 assert_eq!(*alias, Some(id_canon));
1920 }
1921 }
1922 assert!(rev_imports_seen > 0, "reverse CSR must hold Imports edges");
1923 drop(reverse);
1924
1925 assert_eq!(
1926 finalized.strings().ref_count(id_dup),
1927 0,
1928 "duplicate slot must be recycled (ref_count == 0) by step 1"
1929 );
1930 }
1931
1932 /// Exhaustive coverage: every `StringId`-holding surface *reachable
1933 /// through `CodeGraph`'s public mutation API* must be rewritten by
1934 /// finalize step 1. This test drives each surface with a distinct
1935 /// duplicate `StringId` slot and verifies after finalize that (i)
1936 /// every surface references the canonical slot, never the duplicate;
1937 /// (ii) every duplicate slot is recycled (`ref_count == 0`); (iii)
1938 /// the canonical slot retains a positive ref count.
1939 ///
1940 /// Surfaces exercised:
1941 /// * **Arena** (`NodeEntry.name`, `NodeEntry.qualified_name`)
1942 /// * **AuxiliaryIndices** (`name_index` / `qualified_name_index` keys)
1943 /// * **FileRegistry** (`FileEntry.source_uri`)
1944 /// * **BidirectionalEdgeStore** (`EdgeKind::Imports.alias` — delta tier)
1945 ///
1946 /// `AliasTable` and `ShadowTable` StringId fields live under private
1947 /// API (`pub(crate) fn set_alias_table`, no public entry mutators)
1948 /// and are already covered by the iter-2 `step1_remaps_*` tests above
1949 /// that exercise their `rewrite_string_ids_through_remap` helpers
1950 /// end-to-end through the binding-plane derivation path. Covering
1951 /// them here would require either exposing additional test-only APIs
1952 /// or duplicating the iter-2 coverage; neither adds assurance beyond
1953 /// what the dedicated iter-2 tests already provide.
1954 #[test]
1955 fn step1_remaps_all_stringid_holders_exhaustively() {
1956 use crate::graph::unified::edge::EdgeKind;
1957 use crate::graph::unified::storage::interner::StringInterner;
1958
1959 let mut graph = CodeGraph::new();
1960 let file_a = FileId::new(1);
1961 let interner: &mut StringInterner = graph.strings_mut();
1962 // 10 slots = 5 surfaces × 2 (canon + dup) interleaved.
1963 let start = interner.alloc_range(10).expect("alloc range");
1964 {
1965 let (s, rc) = interner.bulk_slices_mut(start, 10);
1966 for (i, label) in [
1967 "arena_name",
1968 "arena_name",
1969 "arena_qname",
1970 "arena_qname",
1971 "idx_key",
1972 "idx_key",
1973 "edge_alias",
1974 "edge_alias",
1975 "file_uri",
1976 "file_uri",
1977 ]
1978 .iter()
1979 .enumerate()
1980 {
1981 s[i] = Some(std::sync::Arc::from(*label));
1982 rc[i] = 1;
1983 }
1984 }
1985 let sid = |off: u32| crate::graph::unified::string::id::StringId::new(start + off);
1986 let arena_name_canon = sid(0);
1987 let arena_name_dup = sid(1);
1988 let arena_qname_canon = sid(2);
1989 let arena_qname_dup = sid(3);
1990 let idx_key_canon = sid(4);
1991 let idx_key_dup = sid(5);
1992 let edge_alias_canon = sid(6);
1993 let edge_alias_dup = sid(7);
1994 let file_uri_canon = sid(8);
1995 let file_uri_dup = sid(9);
1996
1997 // (a) Arena — name + qualified_name reference duplicate slots.
1998 let node;
1999 let node2;
2000 {
2001 let arena = graph.nodes_mut();
2002 let mut entry = NodeEntry::new(NodeKind::Function, arena_name_dup, file_a);
2003 entry.qualified_name = Some(arena_qname_dup);
2004 node = arena.alloc(entry).expect("alloc arena node");
2005 node2 = arena
2006 .alloc(NodeEntry::new(NodeKind::Function, arena_name_dup, file_a))
2007 .expect("alloc arena node2");
2008 }
2009 // (b) Indices — key under the duplicate StringId.
2010 graph
2011 .indices_mut()
2012 .add(node, NodeKind::Function, idx_key_dup, None, file_a);
2013 graph
2014 .indices_mut()
2015 .add(node2, NodeKind::Function, idx_key_dup, None, file_a);
2016 // (c) File registry — register a file with source_uri = dup.
2017 let fid = graph
2018 .files_mut()
2019 .register_external_with_uri(
2020 "/virtual/Exhaustive.class",
2021 Some(crate::graph::node::Language::Java),
2022 Some(file_uri_dup),
2023 )
2024 .expect("register external");
2025 // (d) Edge store — Imports edge with alias = dup.
2026 graph.edges_mut().add_edge(
2027 node,
2028 node2,
2029 EdgeKind::Imports {
2030 alias: Some(edge_alias_dup),
2031 is_wildcard: false,
2032 },
2033 file_a,
2034 );
2035
2036 let rebuild = graph.clone_for_rebuild();
2037 let finalized = rebuild.finalize().expect("finalize ok");
2038
2039 // (a) Arena: name + qualified_name on every live node.
2040 for nid in [node, node2] {
2041 let entry = finalized.nodes().get(nid).expect("node survives");
2042 assert_eq!(entry.name, arena_name_canon, "arena name canonicalised");
2043 if let Some(q) = entry.qualified_name {
2044 assert_eq!(q, arena_qname_canon, "arena qname canonicalised");
2045 }
2046 }
2047 // (b) Indices: canonical bucket populated, duplicate bucket empty.
2048 assert!(finalized.indices().by_name(idx_key_dup).is_empty());
2049 assert!(!finalized.indices().by_name(idx_key_canon).is_empty());
2050 // (c) File registry.
2051 let view = finalized
2052 .files()
2053 .file_provenance(fid)
2054 .expect("provenance present");
2055 assert_eq!(view.source_uri, Some(file_uri_canon));
2056 // (d) Edge store: every Imports alias canonical on both
2057 // directions. Post-finalize, edges live in CSR (step 9 drained
2058 // the delta into the CSR on both directions). Step 1's remap
2059 // ran on the delta *before* step 9, so the CSR is built from
2060 // the already-remapped delta.
2061 let fwd = finalized.edges().forward();
2062 let fwd_csr = fwd
2063 .csr()
2064 .expect("forward CSR must be populated after step 9");
2065 let mut fwd_imports = 0usize;
2066 for kind in fwd_csr.edge_kind() {
2067 if let EdgeKind::Imports { alias, .. } = kind {
2068 fwd_imports += 1;
2069 assert_ne!(*alias, Some(edge_alias_dup));
2070 assert_eq!(*alias, Some(edge_alias_canon));
2071 }
2072 }
2073 drop(fwd);
2074 assert!(fwd_imports > 0, "forward Imports must be present in CSR");
2075 let rev = finalized.edges().reverse();
2076 let rev_csr = rev
2077 .csr()
2078 .expect("reverse CSR must be populated after step 9");
2079 let mut rev_imports = 0usize;
2080 for kind in rev_csr.edge_kind() {
2081 if let EdgeKind::Imports { alias, .. } = kind {
2082 rev_imports += 1;
2083 assert_ne!(*alias, Some(edge_alias_dup));
2084 assert_eq!(*alias, Some(edge_alias_canon));
2085 }
2086 }
2087 drop(rev);
2088 assert!(rev_imports > 0, "reverse Imports must be present in CSR");
2089
2090 // All duplicate slots recycled; canonical slots retained.
2091 for (dup, canon, label) in [
2092 (arena_name_dup, arena_name_canon, "arena_name"),
2093 (arena_qname_dup, arena_qname_canon, "arena_qname"),
2094 (idx_key_dup, idx_key_canon, "idx_key"),
2095 (edge_alias_dup, edge_alias_canon, "edge_alias"),
2096 (file_uri_dup, file_uri_canon, "file_uri"),
2097 ] {
2098 assert_eq!(
2099 finalized.strings().ref_count(dup),
2100 0,
2101 "{label}: duplicate slot must be recycled (ref_count == 0)"
2102 );
2103 assert!(
2104 finalized.strings().ref_count(canon) > 0,
2105 "{label}: canonical slot must retain references (got {})",
2106 finalized.strings().ref_count(canon)
2107 );
2108 }
2109 }
2110
2111 // ----------------------------------------------------------------
2112 // Iter-2 B2: step 6 + step 13 per_file_nodes / bucket bijection.
2113 // ----------------------------------------------------------------
2114
2115 #[test]
2116 fn finalize_step6_drops_tombstoned_nodes_from_buckets() {
2117 let (graph, a, b, c) = seeded_graph();
2118 // Seed the registry buckets so finalize step 6 has real work.
2119 let file_a = FileId::new(1);
2120 let file_b = FileId::new(2);
2121 {
2122 // Direct access via clone-for-rebuild — we need to populate
2123 // the REBUILD-local registry because `seeded_graph` doesn't.
2124 }
2125 // Populate buckets on the graph first so clone captures them.
2126 {
2127 let files = graph.files();
2128 let _ = files; // ensure immut ref scope
2129 }
2130 let mut graph = graph;
2131 graph.files_mut().record_node(file_a, a);
2132 graph.files_mut().record_node(file_a, b);
2133 graph.files_mut().record_node(file_b, c);
2134
2135 let mut rebuild = graph.clone_for_rebuild();
2136 rebuild.tombstone(a);
2137 rebuild.tombstone(c);
2138 let finalized = rebuild.finalize().expect("finalize ok");
2139
2140 // a and c were tombstoned — must be absent from any bucket.
2141 // b survives under file_a; file_b's bucket collapsed to empty
2142 // and must be dropped.
2143 let buckets: std::collections::BTreeMap<FileId, Vec<crate::graph::unified::node::NodeId>> =
2144 finalized.files().per_file_nodes_for_gate0d().collect();
2145 assert_eq!(buckets.len(), 1, "empty bucket must be dropped");
2146 assert_eq!(buckets.get(&file_a).cloned().unwrap_or_default(), vec![b]);
2147 assert!(!buckets.contains_key(&file_b));
2148 }
2149
2150 #[test]
2151 fn finalize_step6_dedups_within_bucket() {
2152 let (mut graph, a, b, c) = seeded_graph();
2153 let file_a = FileId::new(1);
2154 let file_b = FileId::new(2);
2155 // Bucket every live node consistently with seeded_graph's own
2156 // FileId assignment (so the non-vacuous bijection check passes),
2157 // but duplicate `a` once to exercise step 6's dedup path.
2158 graph.files_mut().record_node(file_a, a);
2159 graph.files_mut().record_node(file_a, a); // intentional duplicate
2160 graph.files_mut().record_node(file_a, b);
2161 graph.files_mut().record_node(file_b, c);
2162 let rebuild = graph.clone_for_rebuild();
2163 let finalized = rebuild.finalize().expect("finalize ok");
2164 let buckets: std::collections::BTreeMap<FileId, Vec<crate::graph::unified::node::NodeId>> =
2165 finalized.files().per_file_nodes_for_gate0d().collect();
2166 let bucket_a = buckets.get(&file_a).expect("bucket for file_a");
2167 // Two live nodes (a, b) in file_a, duplicate `a` collapsed.
2168 assert_eq!(
2169 bucket_a.len(),
2170 2,
2171 "duplicates within bucket must be dedup'd"
2172 );
2173 assert!(bucket_a.contains(&a));
2174 assert!(bucket_a.contains(&b));
2175 }
2176
2177 #[test]
2178 fn finalize_step6_drops_empty_buckets() {
2179 let (mut graph, a, _b, _c) = seeded_graph();
2180 let file = FileId::new(1);
2181 graph.files_mut().record_node(file, a);
2182 let mut rebuild = graph.clone_for_rebuild();
2183 rebuild.tombstone(a); // drops the only bucket member
2184 let finalized = rebuild.finalize().expect("finalize ok");
2185 assert_eq!(
2186 finalized.files().per_file_bucket_count(),
2187 0,
2188 "empty bucket must be dropped by step 6"
2189 );
2190 }
2191
2192 #[test]
2193 fn bucket_bijection_passes_when_every_live_node_is_bucketed() {
2194 let (mut graph, a, b, c) = seeded_graph();
2195 // Bucket every live node consistently with each node's FileId.
2196 // seeded_graph uses file_a=1 for a, b and file_b=2 for c.
2197 let file_a = FileId::new(1);
2198 let file_b = FileId::new(2);
2199 graph.files_mut().record_node(file_a, a);
2200 graph.files_mut().record_node(file_a, b);
2201 graph.files_mut().record_node(file_b, c);
2202
2203 // Round-trip through finalize so the bijection check runs.
2204 let rebuild = graph.clone_for_rebuild();
2205 let finalized = rebuild.finalize().expect("finalize ok");
2206 // Explicit assert_bucket_bijection call also passes.
2207 finalized.assert_bucket_bijection();
2208 }
2209
2210 #[test]
2211 #[should_panic(expected = "duplicate node")]
2212 fn bucket_bijection_detects_duplicate_within_bucket() {
2213 let (mut graph, a, _b, _c) = seeded_graph();
2214 let file_a = FileId::new(1);
2215 // Manually force a duplicate inside a bucket by bypassing
2216 // `retain_nodes_in_buckets`. We achieve this by calling
2217 // `record_node` twice on the live graph and then invoking the
2218 // bijection check directly *without* routing through finalize
2219 // (which would dedup in step 6).
2220 graph.files_mut().record_node(file_a, a);
2221 graph.files_mut().record_node(file_a, a);
2222 graph.assert_bucket_bijection();
2223 }
2224
2225 #[test]
2226 #[should_panic(expected = "misfiled")]
2227 fn bucket_bijection_detects_misfiled_node() {
2228 let (mut graph, a, _b, _c) = seeded_graph();
2229 // Node `a` was allocated against FileId(1); put it in a bucket
2230 // keyed by FileId(99) — the bijection must reject with the
2231 // precise "misfiled" panic message emitted by
2232 // `CodeGraph::assert_bucket_bijection` (concurrent/graph.rs
2233 // around line 865). A broader `expected = "node"` would also
2234 // match the duplicate-node / dead-node / multi-bucket arms, so
2235 // it would not discriminate the specific failure mode under test.
2236 graph.files_mut().record_node(FileId::new(99), a);
2237 graph.assert_bucket_bijection();
2238 }
2239
2240 #[test]
2241 #[should_panic(expected = "absent from all buckets")]
2242 fn bucket_bijection_detects_missing_live_node() {
2243 let (mut graph, a, _b, c) = seeded_graph();
2244 // Bucket one node but not the other; with at least one populated
2245 // bucket, condition (d) becomes strict and the missing live node
2246 // must trigger a panic.
2247 let file_a = FileId::new(1);
2248 let _ = c;
2249 graph.files_mut().record_node(file_a, a);
2250 graph.assert_bucket_bijection();
2251 }
2252
2253 #[test]
2254 #[should_panic(expected = "dead node")]
2255 fn bucket_bijection_detects_dead_node_in_bucket() {
2256 let (mut graph, a, _b, _c) = seeded_graph();
2257 // Tombstone a, then leave it in a bucket — bijection must reject.
2258 let file_a = FileId::new(1);
2259 graph.files_mut().record_node(file_a, a);
2260 graph.nodes_mut().remove(a);
2261 graph.assert_bucket_bijection();
2262 }
2263
2264 // -----------------------------------------------------------------
2265 // Gate 0d — Tombstone-residue negative tests.
2266 //
2267 // Covers both the `RebuildGraph::assert_no_tombstone_residue`
2268 // diagnostic helper and the finalize step-14 publish-boundary
2269 // check. The bijection counterparts live in the block immediately
2270 // above.
2271 // -----------------------------------------------------------------
2272
2273 #[test]
2274 #[should_panic(expected = "still in edge store")]
2275 fn rebuild_graph_residue_detects_tombstone_still_in_edge_store() {
2276 // Gate 0d iter-1 Minor — dedicated negative test for the
2277 // edge-store residue arm.
2278 //
2279 // The residue check iterates K-rows in order (see
2280 // `assert_no_tombstone_residue` above): NodeArena → edges →
2281 // AuxiliaryIndices → macro_metadata → ... — so to exercise the
2282 // `edges` arm specifically we must remove `a` from the
2283 // NodeArena first (so the NodeArena arm does not fire), leave
2284 // the edge that references `a` intact, and tombstone it.
2285 use crate::graph::unified::edge::kind::EdgeKind;
2286 #[cfg(test)]
2287 use crate::graph::unified::edge::kind::ResolvedVia;
2288 let (graph, a, b, _c) = seeded_graph();
2289 let mut rebuild = graph.clone_for_rebuild();
2290 // Seed the edge store with an edge that references `a`. This
2291 // becomes the "dangling reference" the residue check must
2292 // detect: `a` will be removed from the arena but the edge
2293 // keeps pointing at it.
2294 rebuild.edges.add_edge(
2295 a,
2296 b,
2297 EdgeKind::Calls {
2298 argument_count: 0,
2299 is_async: false,
2300 resolved_via: ResolvedVia::Direct,
2301 },
2302 FileId::new(1),
2303 );
2304 // Remove `a` from the arena so the K.A1 arm passes.
2305 rebuild.nodes.remove(a);
2306 rebuild.tombstone(a);
2307 rebuild.assert_no_tombstone_residue();
2308 }
2309
2310 #[test]
2311 #[should_panic(expected = "still in auxiliary indices")]
2312 fn rebuild_graph_residue_detects_tombstone_still_in_auxiliary_indices() {
2313 // The residue check iterates K-rows starting at `NodeArena`, so
2314 // to exercise the `AuxiliaryIndices` arm specifically we must
2315 // first remove `a` from the arena (simulating step 2 of
2316 // finalize), leaving the `AuxiliaryIndices` stale reference
2317 // as the first arm that can trip. This reproduces the exact
2318 // failure mode the pre-finalize helper exists to catch: a bug
2319 // where finalize compacts the arena but forgets one index.
2320 let (graph, a, _b, _c) = seeded_graph();
2321 let mut rebuild = graph.clone_for_rebuild();
2322 rebuild.nodes.remove(a);
2323 rebuild.tombstone(a);
2324 rebuild.assert_no_tombstone_residue();
2325 }
2326
2327 #[test]
2328 #[should_panic(expected = "still in NodeArena")]
2329 fn rebuild_graph_residue_detects_tombstone_still_in_node_arena() {
2330 // NodeArena is the first `NodeIdBearing` in the residue check's
2331 // iteration order, so we can trip it with a raw `tombstone(a)`
2332 // call before we touch any auxiliary index. The live arena
2333 // entry for `a` remains — that is the bug we want to surface.
2334 let (graph, a, _b, _c) = seeded_graph();
2335 let mut rebuild = graph.clone_for_rebuild();
2336 // Prove the arena still contains `a` before the assertion so
2337 // the panic expectation maps to a real condition, not a vacuous
2338 // empty-tombstone skip.
2339 assert!(
2340 rebuild.nodes.get(a).is_some(),
2341 "pre-condition: arena must still contain the staged tombstone"
2342 );
2343 rebuild.tombstone(a);
2344 rebuild.assert_no_tombstone_residue();
2345 }
2346
2347 #[test]
2348 fn rebuild_graph_residue_is_noop_on_empty_tombstone_set() {
2349 // Positive: with no tombstones staged, the assertion must be a
2350 // no-op — otherwise every fresh `clone_for_rebuild` would panic.
2351 let (graph, _a, _b, _c) = seeded_graph();
2352 let rebuild = graph.clone_for_rebuild();
2353 assert_eq!(rebuild.pending_tombstone_count(), 0);
2354 rebuild.assert_no_tombstone_residue();
2355 }
2356
2357 #[test]
2358 #[should_panic(expected = "still in NodeArena")]
2359 fn finalize_step14_residue_detects_live_reference_to_drained_node() {
2360 // Drive the `finalize` step-14 residue assertion through the
2361 // publish-boundary helper. `seeded_graph()` returns a graph in
2362 // which `a` is a live NodeArena entry; passing `a` in the
2363 // `drained` set without actually removing the arena slot
2364 // simulates the bug step 14 exists to catch — step 8 drained
2365 // `a` into `drained_tombstones`, but something failed to
2366 // compact the arena. The residue check's K-row iteration
2367 // starts at `NodeArena`, so that is the arm that fires.
2368 //
2369 // The test routes through the canonical
2370 // `publish::assert_publish_invariants` helper so the test
2371 // exercises the exact code path `finalize` step 14 executes,
2372 // not just the underlying `assert_no_tombstone_residue_for`
2373 // entry point.
2374 let (graph, a, _b, _c) = seeded_graph();
2375 let mut drained: ::std::collections::HashSet<NodeId> = ::std::collections::HashSet::new();
2376 drained.insert(a);
2377 crate::graph::unified::publish::assert_publish_invariants(&graph, &drained);
2378 }
2379
2380 #[test]
2381 fn finalize_with_empty_drained_set_passes_publish_invariants() {
2382 // Positive smoke test: the happy path through finalize (no
2383 // tombstones staged) must pass `assert_publish_invariants`
2384 // unconditionally on every build profile. This is the case the
2385 // full-rebuild `build_unified_graph_inner` end-of-function call
2386 // exercises on every CI run.
2387 let (graph, _a, _b, _c) = seeded_graph();
2388 let file_a = FileId::new(1);
2389 let file_b = FileId::new(2);
2390 let mut graph = graph;
2391 graph.files_mut().record_node(file_a, _a);
2392 graph.files_mut().record_node(file_a, _b);
2393 graph.files_mut().record_node(file_b, _c);
2394
2395 let rebuild = graph.clone_for_rebuild();
2396 let finalized = rebuild.finalize().expect("finalize ok");
2397 crate::graph::unified::publish::assert_publish_invariants(
2398 &finalized,
2399 &::std::collections::HashSet::new(),
2400 );
2401 }
2402
2403 // ---- Task 4 Step 3 — RebuildGraph::remove_file ------------------
2404
2405 /// Seed a graph + rebuild with 2 files × `per_file` nodes and a
2406 /// mix of intra- and cross-file `Calls` edges, then clone for
2407 /// rebuild. Returns `(rebuild, file_a, file_b, file_a_nodes,
2408 /// file_b_nodes)` — mirrors `seed_two_file_graph` in the
2409 /// `concurrent::graph::tests` module but yields the rebuild value
2410 /// so tests here can drive `RebuildGraph::remove_file` directly.
2411 fn seed_two_file_rebuild(
2412 per_file: usize,
2413 ) -> (
2414 RebuildGraph,
2415 crate::graph::unified::file::FileId,
2416 crate::graph::unified::file::FileId,
2417 Vec<NodeId>,
2418 Vec<NodeId>,
2419 ) {
2420 use crate::graph::unified::edge::{EdgeKind, ResolvedVia};
2421 use crate::graph::unified::node::NodeKind;
2422 use crate::graph::unified::storage::arena::NodeEntry;
2423 use std::path::Path;
2424
2425 let mut graph = CodeGraph::new();
2426 let sym = graph.strings_mut().intern("sym").expect("intern");
2427 let file_a = graph
2428 .files_mut()
2429 .register(Path::new("/tmp/rebuild_remove_file_test/a.rs"))
2430 .expect("register a");
2431 let file_b = graph
2432 .files_mut()
2433 .register(Path::new("/tmp/rebuild_remove_file_test/b.rs"))
2434 .expect("register b");
2435
2436 let mut file_a_nodes = Vec::with_capacity(per_file);
2437 let mut file_b_nodes = Vec::with_capacity(per_file);
2438 for _ in 0..per_file {
2439 let n = graph
2440 .nodes_mut()
2441 .alloc(NodeEntry::new(NodeKind::Function, sym, file_a))
2442 .expect("alloc a");
2443 file_a_nodes.push(n);
2444 graph.files_mut().record_node(file_a, n);
2445 graph
2446 .indices_mut()
2447 .add(n, NodeKind::Function, sym, None, file_a);
2448 }
2449 for _ in 0..per_file {
2450 let n = graph
2451 .nodes_mut()
2452 .alloc(NodeEntry::new(NodeKind::Function, sym, file_b))
2453 .expect("alloc b");
2454 file_b_nodes.push(n);
2455 graph.files_mut().record_node(file_b, n);
2456 graph
2457 .indices_mut()
2458 .add(n, NodeKind::Function, sym, None, file_b);
2459 }
2460 for i in 0..per_file.saturating_sub(1) {
2461 graph.edges_mut().add_edge(
2462 file_a_nodes[i],
2463 file_a_nodes[i + 1],
2464 EdgeKind::Calls {
2465 argument_count: 0,
2466 is_async: false,
2467 resolved_via: ResolvedVia::Direct,
2468 },
2469 file_a,
2470 );
2471 graph.edges_mut().add_edge(
2472 file_b_nodes[i],
2473 file_b_nodes[i + 1],
2474 EdgeKind::Calls {
2475 argument_count: 0,
2476 is_async: false,
2477 resolved_via: ResolvedVia::Direct,
2478 },
2479 file_b,
2480 );
2481 }
2482 graph.edges_mut().add_edge(
2483 file_a_nodes[0],
2484 file_b_nodes[0],
2485 EdgeKind::Calls {
2486 argument_count: 0,
2487 is_async: false,
2488 resolved_via: ResolvedVia::Direct,
2489 },
2490 file_a,
2491 );
2492 graph.edges_mut().add_edge(
2493 file_b_nodes[0],
2494 file_a_nodes[0],
2495 EdgeKind::Calls {
2496 argument_count: 0,
2497 is_async: false,
2498 resolved_via: ResolvedVia::Direct,
2499 },
2500 file_b,
2501 );
2502
2503 let rebuild = graph.clone_for_rebuild();
2504 (rebuild, file_a, file_b, file_a_nodes, file_b_nodes)
2505 }
2506
2507 #[test]
2508 fn rebuild_remove_file_tombstones_all_per_file_nodes() {
2509 let (mut rebuild, file_a, _file_b, file_a_nodes, _) = seed_two_file_rebuild(3);
2510
2511 let returned = rebuild.remove_file(file_a);
2512
2513 let returned_set: std::collections::HashSet<NodeId> = returned.iter().copied().collect();
2514 let expected_set: std::collections::HashSet<NodeId> =
2515 file_a_nodes.iter().copied().collect();
2516 assert_eq!(
2517 returned_set, expected_set,
2518 "remove_file must return exactly the file_a nodes drained from the bucket"
2519 );
2520 // Each returned NodeId is arena-gone on the rebuild.
2521 for nid in &file_a_nodes {
2522 assert!(
2523 rebuild.nodes.get(*nid).is_none(),
2524 "node {nid:?} from removed file must be tombstoned on rebuild arena"
2525 );
2526 }
2527 // And staged for the finalize-time NodeIdBearing sweep.
2528 assert_eq!(rebuild.pending_tombstone_count(), file_a_nodes.len());
2529 }
2530
2531 #[test]
2532 fn rebuild_remove_file_invalidates_all_edges_sourced_or_targeted_at_removed_nodes() {
2533 let (mut rebuild, file_a, _file_b, file_a_nodes, file_b_nodes) = seed_two_file_rebuild(3);
2534
2535 // Seed produces 6 forward delta edges (2 intra-A + 2 intra-B
2536 // + 2 cross). The rebuild's own forward/reverse edge stores
2537 // mirror this.
2538 assert_eq!(rebuild.edges.forward().delta().len(), 6);
2539
2540 let _ = rebuild.remove_file(file_a);
2541
2542 // After removal: only 2 intra-B edges remain in each direction.
2543 assert_eq!(rebuild.edges.forward().delta().len(), 2);
2544 assert_eq!(rebuild.edges.reverse().delta().len(), 2);
2545
2546 // Cross-file edge b[0] -> a[0] must be gone from any direction.
2547 let b0 = file_b_nodes[0];
2548 let a0 = file_a_nodes[0];
2549 let from_b0: Vec<_> = rebuild
2550 .edges
2551 .edges_from(b0)
2552 .into_iter()
2553 .filter(|e| e.target == a0)
2554 .collect();
2555 assert!(
2556 from_b0.is_empty(),
2557 "edge b0 -> a0 must be gone after rebuild.remove_file(file_a)"
2558 );
2559 }
2560
2561 #[test]
2562 fn rebuild_remove_file_drops_file_registry_entry() {
2563 let (mut rebuild, file_a, _file_b, _, _) = seed_two_file_rebuild(2);
2564
2565 assert!(rebuild.files.resolve(file_a).is_some());
2566 assert!(!rebuild.files.nodes_for_file(file_a).is_empty());
2567
2568 let _ = rebuild.remove_file(file_a);
2569
2570 assert!(
2571 rebuild.files.resolve(file_a).is_none(),
2572 "rebuild FileRegistry entry must be gone"
2573 );
2574 assert!(
2575 rebuild.files.nodes_for_file(file_a).is_empty(),
2576 "rebuild per-file bucket for file_a must be drained"
2577 );
2578 }
2579
2580 #[test]
2581 fn rebuild_remove_file_is_idempotent_on_unknown_file() {
2582 use crate::graph::unified::file::FileId;
2583 let (mut rebuild, _file_a, _file_b, _, _) = seed_two_file_rebuild(2);
2584
2585 let nodes_before = rebuild.nodes.len();
2586 let delta_fwd_before = rebuild.edges.forward().delta().len();
2587 let delta_rev_before = rebuild.edges.reverse().delta().len();
2588 let tombstones_before = rebuild.pending_tombstone_count();
2589
2590 let bogus = FileId::new(9999);
2591 let returned = rebuild.remove_file(bogus);
2592 assert!(returned.is_empty());
2593
2594 assert_eq!(rebuild.nodes.len(), nodes_before);
2595 assert_eq!(rebuild.edges.forward().delta().len(), delta_fwd_before);
2596 assert_eq!(rebuild.edges.reverse().delta().len(), delta_rev_before);
2597 assert_eq!(rebuild.pending_tombstone_count(), tombstones_before);
2598 }
2599
2600 #[test]
2601 fn rebuild_remove_file_stages_tombstones_for_finalize_sweep() {
2602 // The whole point of RebuildGraph::remove_file deferring the
2603 // K.A/K.B sweep to finalize() is that the sweep happens exactly
2604 // once against the union of every file's tombstones. Drive this
2605 // end-to-end: remove both files, finalize, and assert the
2606 // publish-boundary invariants hold.
2607 let (mut rebuild, file_a, file_b, file_a_nodes, file_b_nodes) = seed_two_file_rebuild(2);
2608
2609 let _ = rebuild.remove_file(file_a);
2610 let _ = rebuild.remove_file(file_b);
2611
2612 // Pending tombstones: union of both files' nodes.
2613 assert_eq!(
2614 rebuild.pending_tombstone_count(),
2615 file_a_nodes.len() + file_b_nodes.len()
2616 );
2617
2618 // Pre-finalize residue check against the rebuild-local state
2619 // is expected to pass: every NodeIdBearing surface on the
2620 // rebuild must already be clean of the tombstoned IDs (via the
2621 // immediate arena + edge tombstoning in step 1–2 of
2622 // remove_file) — NO, the NodeIdBearing K.A/K.B surfaces beyond
2623 // NodeArena/edges are NOT touched before finalize. The
2624 // assert_no_tombstone_residue helper on RebuildGraph walks
2625 // every surface, so it will legitimately find tombstones in
2626 // the auxiliary indices + metadata stores until finalize runs.
2627 //
2628 // So: do NOT run the pre-finalize residue check here. Instead,
2629 // confirm finalize runs successfully and the assembled
2630 // CodeGraph passes the publish-boundary invariants against
2631 // the drained set — that is the real post-condition.
2632 let finalized = rebuild.finalize().expect("finalize must succeed");
2633
2634 // Every surface on the finalized CodeGraph must be clean of the
2635 // tombstoned ids. Use the publish-boundary residue helper
2636 // against an empty dead set (finalize's own step-14 call already
2637 // covered the drained set; we re-verify the bijection for
2638 // paranoia).
2639 crate::graph::unified::publish::assert_publish_bijection(&finalized);
2640 // Arena is empty (every seeded node was tombstoned).
2641 assert_eq!(finalized.nodes().len(), 0);
2642 // No file registration survives.
2643 assert!(finalized.files().resolve(file_a).is_none());
2644 assert!(finalized.files().resolve(file_b).is_none());
2645 }
2646
2647 #[test]
2648 fn rebuild_remove_file_repeated_calls_are_idempotent() {
2649 let (mut rebuild, file_a, _file_b, file_a_nodes, _) = seed_two_file_rebuild(3);
2650
2651 let first = rebuild.remove_file(file_a);
2652 assert_eq!(first.len(), file_a_nodes.len());
2653 let staged = rebuild.pending_tombstone_count();
2654
2655 // Second call: bucket is already drained, NodeArena::remove
2656 // ignores stale generations, and the tombstones set should not
2657 // grow. The immediate tombstone side effects are idempotent.
2658 let second = rebuild.remove_file(file_a);
2659 assert!(second.is_empty());
2660 assert_eq!(rebuild.pending_tombstone_count(), staged);
2661 }
2662
2663 #[test]
2664 fn rebuild_remove_file_clears_file_segments_entry() {
2665 // Iter-1 Codex review fix mirror of the CodeGraph-side test
2666 // in `concurrent/graph.rs`. Seed a segment for file A, invoke
2667 // `RebuildGraph::remove_file`, and assert both the
2668 // rebuild-local `file_segments` and the finalized `CodeGraph`'s
2669 // `file_segments` table carry no entry for file A. The second
2670 // half of the check is critical: `finalize()` step 12 publishes
2671 // `self.file_segments` verbatim, so a missing clear here would
2672 // leak the stale range into the publishable graph.
2673 use std::path::Path;
2674
2675 let (mut rebuild, file_a, _file_b, file_a_nodes, _) = seed_two_file_rebuild(3);
2676
2677 // Seed a segment for file A. Fish out the span from the
2678 // allocated NodeIds to mirror the shape `phase3_parallel_commit`
2679 // produces.
2680 let first_index = file_a_nodes
2681 .iter()
2682 .map(|n| n.index())
2683 .min()
2684 .expect("per_file = 3");
2685 let last_index = file_a_nodes
2686 .iter()
2687 .map(|n| n.index())
2688 .max()
2689 .expect("per_file = 3");
2690 let slot_count = last_index - first_index + 1;
2691 rebuild
2692 .file_segments
2693 .record_range(file_a, first_index, slot_count);
2694 assert!(
2695 rebuild.file_segments().get(file_a).is_some(),
2696 "seeded segment for file_a must be present before remove_file"
2697 );
2698
2699 // Act.
2700 let _ = rebuild.remove_file(file_a);
2701
2702 // Rebuild-local post-condition.
2703 assert!(
2704 rebuild.file_segments().get(file_a).is_none(),
2705 "RebuildGraph::remove_file must clear the file_segments entry"
2706 );
2707
2708 // Finalize post-condition: the published CodeGraph must also
2709 // carry no entry for file_a.
2710 let finalized = rebuild.finalize().expect("finalize must succeed");
2711 assert!(
2712 finalized.file_segments().get(file_a).is_none(),
2713 "finalize must publish a CodeGraph with no stale file_segments for file_a"
2714 );
2715
2716 // Defensive suppression: the Path import is used only by the
2717 // adjacent `seed_two_file_rebuild` helper when the signature
2718 // is exercised — the let-binding below keeps this test
2719 // self-contained even if the helper's surface changes later.
2720 let _ = Path::new("");
2721 }
2722}