sqry_core/graph/unified/rebuild/rebuild_graph.rs
1//! [A2 §H] `RebuildGraph` + `clone_for_rebuild` + `finalize()` — Gate 0c.
2//!
3//! **This module is feature-gated.** Every item lives behind
4//! `#[cfg(feature = "rebuild-internals")]`; only `sqry-daemon` is
5//! whitelisted to enable the feature (see
6//! `sqry-core/tests/rebuild_internals_whitelist.rs` and the plan at
7//! `docs/superpowers/plans/2026-03-19-sqryd-daemon.md` §H "Placement
8//! and feature gate").
9//!
10//! # What this module delivers
11//!
12//! Per the plan's A2 §H (pre-implementation Gate 0c, required before
13//! any `clone_for_rebuild` caller in Task 4 Step 4 can exist):
14//!
15//! 1. [`sqry_graph_fields!`] — a **single source of truth** macro that
16//! enumerates every field on [`super::super::concurrent::CodeGraph`].
17//! It is invoked in three places by the rest of this module:
18//! - `sqry_graph_fields!(@decl_rebuild)` declares the
19//! [`RebuildGraph`] struct.
20//! - `sqry_graph_fields!(@clone_inner from self)` is expanded
21//! inside [`CodeGraph::clone_for_rebuild`]; the destructure
22//! `let CodeGraph { .. } = self;` is exhaustive, so adding a
23//! field to [`CodeGraph`] without adding it to the macro's
24//! field list is a hard `E0027` compile error.
25//! - `sqry_graph_fields!(@field_names)` emits a `&[&str]` of
26//! every field name, used by the Gate 0d bijection check
27//! (future) and by diagnostics.
28//! 2. [`CodeGraph::clone_for_rebuild`] — deep-copies every Arc-wrapped
29//! field into an owned, rebuild-local [`RebuildGraph`]. The rebuild
30//! dispatcher (Task 4 Step 4) is the only caller.
31//! 3. [`RebuildGraph::finalize`] — the canonical 14-step sequence
32//! spelled out in §H lines 658–707. Every step is either an API
33//! call on an owned component (freeze interner, compact arena,
34//! compact edges, compact indices, compact metadata, compact file
35//! buckets, compact K-list extras), a drain/move of scratch state
36//! (drain tombstones), a derived-state reset (rebuild CSR), a
37//! per-language metadata update (confidence), a scalar bump (epoch),
38//! a struct assembly (`Arc::new` wrapping), or a debug invariant
39//! (bucket bijection + tombstone residue).
40//!
41//! # Why the macro is kind-tagged, not a single callback
42//!
43//! The plan's literal example invokes `sqry_graph_fields!(CodeGraph,
44//! RebuildGraph)` to generate both structs from one sibling macro
45//! call. That shape would require `CodeGraph` to be declared by the
46//! macro — but `CodeGraph` has dozens of `impl` blocks, Clone
47//! semantics, and accessor methods that already destructure the fields
48//! by name on stable Rust. Moving the declaration into a macro
49//! expansion would obscure every one of those sites.
50//!
51//! A callback-based idiom (`sqry_graph_fields!(my_cb!)`) was also
52//! considered; it worked for the struct declaration but collapsed
53//! under `macro_rules!` hygiene constraints for the destructure-and-
54//! construct clone path (the `self` keyword cannot be synthesised by
55//! a macro; passing identifiers across a callback boundary requires
56//! forwarding them through an `:expr` metavar, which loses the
57//! token-level decomposition needed for Arc-vs-scalar dispatch).
58//!
59//! The **kind-tagged single macro** settled on here expands inline at
60//! the call site — `sqry_graph_fields!(@clone_inner from self)` —
61//! so the destructure runs in the caller's hygiene context, the
62//! `self` keyword is the caller's `self`, and the per-field Arc /
63//! scalar / owned dispatch is baked into the macro body without a
64//! secondary callback hop.
65//!
66//! The single-source-of-truth guarantee is the same: adding a field
67//! requires touching (a) the `CodeGraph` struct in
68//! `concurrent/graph.rs`, (b) the field list inside this macro, and
69//! (c) the step in `finalize()` that compacts it. Missing any of
70//! (a) or (b) makes `clone_for_rebuild_inner` fail to compile.
71//!
72//! # Release-build posture
73//!
74//! The debug bucket-bijection and tombstone-residue checks at steps 13
75//! and 14 are gated on `cfg(any(debug_assertions, test))`. Release
76//! builds pay zero overhead. The §E equivalence harness in CI is the
77//! release-time drift backstop (plan §F "Release build semantics").
78
79// Module-level `#[cfg(feature = "rebuild-internals")]` lives on the
80// `pub mod rebuild_graph;` declaration in the parent `mod.rs`; duplicating
81// it here trips the `duplicated_attributes` clippy lint.
82#![allow(clippy::too_many_lines)]
83
84use std::collections::{HashMap, HashSet};
85
86use crate::confidence::ConfidenceMetadata;
87use crate::graph::error::GraphResult;
88use crate::graph::unified::bind::alias::AliasTable;
89use crate::graph::unified::bind::scope::ScopeArena;
90use crate::graph::unified::bind::scope::provenance::ScopeProvenanceStore;
91use crate::graph::unified::bind::shadow::ShadowTable;
92use crate::graph::unified::concurrent::CodeGraph;
93use crate::graph::unified::edge::bidirectional::BidirectionalEdgeStore;
94use crate::graph::unified::node::NodeId;
95use crate::graph::unified::storage::arena::NodeArena;
96use crate::graph::unified::storage::edge_provenance::EdgeProvenanceStore;
97use crate::graph::unified::storage::indices::AuxiliaryIndices;
98use crate::graph::unified::storage::interner::StringInterner;
99use crate::graph::unified::storage::metadata::NodeMetadataStore;
100use crate::graph::unified::storage::node_provenance::NodeProvenanceStore;
101use crate::graph::unified::storage::registry::FileRegistry;
102use crate::graph::unified::storage::segment::FileSegmentTable;
103
104use super::coverage::NodeIdBearing;
105
106// =====================================================================
107// Single source of truth: field list
108// =====================================================================
109
110/// Single source of truth for the [`CodeGraph`] field list.
111///
112/// This macro takes a **kind-tag** selector (`@decl_rebuild`,
113/// `@clone_inner from ...`, `@field_names`) and emits a tailored
114/// expansion for each consumer. The field-list rows live exactly once,
115/// inside this macro body, so any new field is added in one place:
116/// here.
117///
118/// # Adding / renaming / removing a field on [`CodeGraph`]
119///
120/// 1. Edit the `CodeGraph` struct in
121/// `sqry-core/src/graph/unified/concurrent/graph.rs` (declaration
122/// order matters — match the order here).
123/// 2. Edit the field list below. The `@decl_rebuild` arm auto-mirrors
124/// into `RebuildGraph`; the `@clone_inner` arm auto-mirrors into
125/// `clone_for_rebuild_inner`.
126/// 3. Extend `RebuildGraph::finalize()` with the step that compacts
127/// the new field (and, if it carries `NodeId`s, add a K.A/K.B row
128/// + `NodeIdBearing` impl in `super::coverage`).
129///
130/// Missing step (1) or (2) is a hard `E0027` compile error in the
131/// `let CodeGraph { .. } = ...` destructure inside the `@clone_inner`
132/// arm. Step (3) is enforced by the §F tombstone-residue check
133/// (Gate 0d) and by the §E incremental-vs-full equivalence harness.
134#[macro_export]
135macro_rules! sqry_graph_fields {
136 // -------------------------------------------------------------
137 // Arm @decl_rebuild: emit the `RebuildGraph` struct declaration.
138 // -------------------------------------------------------------
139 (@decl_rebuild) => {
140 /// Owned, rebuild-local mirror of [`CodeGraph`].
141 ///
142 /// Each field stores a value type rather than an [`Arc`], so
143 /// the rebuild task can mutate freely without touching the
144 /// live published graph. The only path to produce a
145 /// publishable [`CodeGraph`] from a [`RebuildGraph`] is
146 /// [`RebuildGraph::finalize`]; the trybuild fixture at
147 /// `sqry-core/tests/rebuild_internals_compile_fail/rebuild_graph_no_public_assembly.rs`
148 /// locks that invariant in.
149 pub struct RebuildGraph {
150 pub(crate) nodes: NodeArena,
151 pub(crate) edges: BidirectionalEdgeStore,
152 pub(crate) strings: StringInterner,
153 pub(crate) files: FileRegistry,
154 pub(crate) indices: AuxiliaryIndices,
155 pub(crate) macro_metadata: NodeMetadataStore,
156 pub(crate) node_provenance: NodeProvenanceStore,
157 pub(crate) edge_provenance: EdgeProvenanceStore,
158 pub(crate) fact_epoch: u64,
159 pub(crate) epoch: u64,
160 pub(crate) confidence: HashMap<String, ConfidenceMetadata>,
161 pub(crate) scope_arena: ScopeArena,
162 pub(crate) alias_table: AliasTable,
163 pub(crate) shadow_table: ShadowTable,
164 pub(crate) scope_provenance_store: ScopeProvenanceStore,
165 pub(crate) file_segments: FileSegmentTable,
166 /// Active tombstone set during finalize steps 2–7.
167 /// Populated by `RebuildGraph::remove_file` /
168 /// `RebuildGraph::tombstone` (added by Task 4 Step 4) and
169 /// drained into [`drained_tombstones`] at step 8.
170 pub(crate) tombstones: HashSet<NodeId>,
171 /// Snapshot of [`tombstones`] taken at finalize step 8,
172 /// kept for the debug residue check in step 14.
173 pub(crate) drained_tombstones: HashSet<NodeId>,
174 }
175 };
176
177 // -------------------------------------------------------------
178 // Arm @clone_inner: emit the body of `clone_for_rebuild_inner`.
179 //
180 // The caller passes their `self` binding as `$this:expr` so
181 // macro hygiene preserves the method-local reference. The
182 // destructure is exhaustive — adding a `CodeGraph` field without
183 // also adding it here is a hard `E0027` compile error. Removing
184 // a row here without removing the `CodeGraph` field is an unused-
185 // pattern warning that our `-D warnings` CI setting escalates.
186 // -------------------------------------------------------------
187 (@clone_inner from $this:expr) => {{
188 let CodeGraph {
189 nodes,
190 edges,
191 strings,
192 files,
193 indices,
194 macro_metadata,
195 node_provenance,
196 edge_provenance,
197 fact_epoch,
198 epoch,
199 confidence,
200 scope_arena,
201 alias_table,
202 shadow_table,
203 scope_provenance_store,
204 file_segments,
205 } = $this;
206 RebuildGraph {
207 nodes: (**nodes).clone(),
208 edges: (**edges).clone(),
209 strings: (**strings).clone(),
210 files: (**files).clone(),
211 indices: (**indices).clone(),
212 macro_metadata: (**macro_metadata).clone(),
213 node_provenance: (**node_provenance).clone(),
214 edge_provenance: (**edge_provenance).clone(),
215 fact_epoch: *fact_epoch,
216 epoch: *epoch,
217 confidence: confidence.clone(),
218 scope_arena: (**scope_arena).clone(),
219 alias_table: (**alias_table).clone(),
220 shadow_table: (**shadow_table).clone(),
221 scope_provenance_store: (**scope_provenance_store).clone(),
222 file_segments: (**file_segments).clone(),
223 tombstones: ::std::collections::HashSet::new(),
224 drained_tombstones: ::std::collections::HashSet::new(),
225 }
226 }};
227
228 // -------------------------------------------------------------
229 // Arm @field_names: emit a comma-separated list of every field
230 // name. Used by tests that want to assert coverage of field names
231 // against a declared list without re-parsing Rust source.
232 // -------------------------------------------------------------
233 (@field_names) => {
234 &[
235 "nodes",
236 "edges",
237 "strings",
238 "files",
239 "indices",
240 "macro_metadata",
241 "node_provenance",
242 "edge_provenance",
243 "fact_epoch",
244 "epoch",
245 "confidence",
246 "scope_arena",
247 "alias_table",
248 "shadow_table",
249 "scope_provenance_store",
250 "file_segments",
251 ]
252 };
253}
254
255// =====================================================================
256// RebuildGraph struct declaration (macro-driven)
257// =====================================================================
258
259sqry_graph_fields!(@decl_rebuild);
260
261// =====================================================================
262// clone_for_rebuild: CodeGraph -> RebuildGraph
263// =====================================================================
264
265impl CodeGraph {
266 /// Produce a fresh [`RebuildGraph`] from this graph's current
267 /// committed state, deep-cloning every Arc-wrapped component so the
268 /// returned value is decoupled from future mutations to `self`.
269 ///
270 /// The exhaustive destructure inside the `@clone_inner` arm of
271 /// [`sqry_graph_fields!`] guarantees that every field on
272 /// [`CodeGraph`] is mirrored into the returned [`RebuildGraph`].
273 /// Adding a field to [`CodeGraph`] without also adding it to the
274 /// field list inside `sqry_graph_fields!` is a hard `E0027`
275 /// compile error on the `let CodeGraph { .. } = self;` destructure.
276 ///
277 /// # When to call this
278 ///
279 /// Only the incremental-rebuild dispatcher (Task 4 Step 4) calls
280 /// `clone_for_rebuild`, on the rebuild task's background tokio
281 /// context, against an `Arc<CodeGraph>` freshly obtained via
282 /// `ArcSwap::load_full()`. The Arc's refcount is 1 in the common
283 /// case, so the underlying deep clones amount to `Arc::get_mut`-
284 /// equivalent cost on each component.
285 ///
286 /// # Performance budget (A2 §H, line 734)
287 ///
288 /// On a 384k-node / 1.3M-edge reference graph, this call must
289 /// complete in < 50 ms. The daemon's rebuild-latency benchmark
290 /// tracks the budget; warning threshold 50 ms, hard record
291 /// threshold 200 ms. Exceeding the warning logs but does not fail
292 /// the rebuild.
293 #[must_use]
294 pub fn clone_for_rebuild(&self) -> RebuildGraph {
295 Self::clone_for_rebuild_inner(self)
296 }
297
298 /// Inner implementation, kept separate so the public entrypoint is
299 /// a single-line shim that also makes the exhaustive destructure
300 /// visible at the top of the rebuild surface.
301 ///
302 /// The `@clone_inner from self` macro arm emits the destructure +
303 /// struct-construction inline; passing `self` as an `:expr`
304 /// metavar preserves call-site hygiene so the macro-introduced
305 /// field bindings (`nodes`, `edges`, ...) are resolvable against
306 /// the `self` binding in this method.
307 fn clone_for_rebuild_inner(&self) -> RebuildGraph {
308 sqry_graph_fields!(@clone_inner from self)
309 }
310}
311
312// =====================================================================
313// finalize(): RebuildGraph -> CodeGraph — the 14-step contract
314// =====================================================================
315
316impl RebuildGraph {
317 /// Consume this [`RebuildGraph`] and assemble a publishable
318 /// [`CodeGraph`].
319 ///
320 /// This is the **only** safe path from [`RebuildGraph`] back to
321 /// [`CodeGraph`]. No public API converts a [`RebuildGraph`] into
322 /// an [`Arc<CodeGraph>`] any other way, and no
323 /// `From<RebuildGraph> for CodeGraph` impl exists — the trybuild
324 /// fixture at
325 /// `sqry-core/tests/rebuild_internals_compile_fail/rebuild_graph_no_public_assembly.rs`
326 /// locks that property in.
327 ///
328 /// # The 14 ordered steps (plan §H lines 658–707)
329 ///
330 /// Each numbered comment below traces the matching plan step. In
331 /// debug / test builds, steps 13 and 14 also execute the §F
332 /// bijection and tombstone-residue checks; release builds compile
333 /// them out.
334 ///
335 /// Steps 2–7 uniformly invoke
336 /// [`super::coverage::NodeIdBearing::retain_nodes`] with the
337 /// closure `|nid| !self.tombstones.contains(&nid)`, so adding a new
338 /// K.A or K.B row for a future `NodeId`-bearing container
339 /// automatically extends compaction coverage once the new row's
340 /// `retain_nodes` call is appended to step 7.
341 ///
342 /// # Errors
343 ///
344 /// Returns `Err(GraphBuilderError::Internal{..})` only if a
345 /// compaction primitive fails; all current primitives are
346 /// infallible, so `finalize()` is currently infallible. The `Result`
347 /// return is preserved for future fallible compaction steps without
348 /// a signature change.
349 pub fn finalize(mut self) -> GraphResult<CodeGraph> {
350 // ----------------------------------------------------------------
351 // Step 1 — Freeze the rebuild's interner.
352 //
353 // The plan §H describes step 1 as `new_strings =
354 // self.string_builder.freeze()`. In this codebase the
355 // rebuild-local interner is a [`StringInterner`] (no separate
356 // "builder" type), so the concrete freeze operation is:
357 //
358 // (a) canonicalise the interner by running
359 // [`StringInterner::build_dedup_table`]. This guarantees:
360 // * `lookup_stale == false` (required for all read-path
361 // accessors); any in-flight bulk-alloc residue from a
362 // prior failed rebuild is healed.
363 // * Every string value is backed by exactly one
364 // canonical slot — no duplicate arcs remain. The
365 // `ref_counts` of duplicate slots are folded into
366 // their canonical slot. `StringId` values held by
367 // live `NodeEntry`s remain valid because this pass
368 // only collapses slots that are *not* referenced from
369 // live nodes (nothing in this pass renames already
370 // live slots).
371 // (b) rewrite every live `NodeEntry` through the remap table
372 // produced by (a) so any node whose fields happen to
373 // point at a now-collapsed duplicate slot is rewired to
374 // the canonical slot. This preserves the post-freeze
375 // invariant "every live `NodeEntry` references only
376 // canonical `StringId`s" even if earlier rebuild steps
377 // produced duplicate slots.
378 // (c) prune unreferenced slots with
379 // [`StringInterner::recycle_unreferenced`] so the frozen
380 // interner carries only strings the final graph actually
381 // needs. This is the step that drives
382 // `interner_live_ratio < interner_compaction_threshold`
383 // down over time (plan §H line 713) so the next
384 // housekeeping rebuild observes an accurate live ratio.
385 //
386 // After this block runs, `self.strings` is in a fully
387 // canonical, shared-immutable-ready state. Step 12's
388 // `Arc::new(new_strings)` is then purely the ownership-to-
389 // shared transition; the semantic freeze itself has already
390 // happened here.
391 // ----------------------------------------------------------------
392 let string_remap = self.strings.build_dedup_table();
393 if !string_remap.is_empty() {
394 // Rewrite every live node's interned-string fields through
395 // the remap so freshly-collapsed duplicate slots are not
396 // reachable from any live `NodeEntry`. This is required
397 // for (c) below to be safe to call: `recycle_unreferenced`
398 // only frees slots with `ref_count == 0`, so the refcount
399 // bookkeeping that `build_dedup_table` consolidated into
400 // the canonical slot must be honoured by node-level fields
401 // as well.
402 rewrite_node_entries_through_remap(&mut self.nodes, &string_remap);
403 // Iter-2 B1 (verbatim): `AuxiliaryIndices::name_index` /
404 // `qualified_name_index` are keyed by `StringId`. If
405 // `build_dedup_table()` collapses a key's slot and
406 // `recycle_unreferenced` subsequently frees it, the bucket
407 // keys would dangle. Remap every `StringId`-backed holder
408 // on the rebuild through the same dedup table before the
409 // recycle pass runs, merging any buckets that collapse onto
410 // the same canonical key.
411 //
412 // All remap-capable stores are listed here exhaustively.
413 // Extending this block when a new `CodeGraph` field gains a
414 // `StringId` payload is a code-owner obligation tied to the
415 // `sqry_graph_fields!` macro (plan §H "Placement and feature
416 // gate"). Today, the StringId-bearing surfaces on the
417 // publish-visible graph are:
418 //
419 // * `AuxiliaryIndices.name_index` / `qualified_name_index`
420 // (BTreeMap keys — collapse on canonicalisation)
421 // * `FileRegistry` — `FileEntry.source_uri: Option<StringId>`
422 // per slot
423 // * `AliasTable` — `AliasEntry.from_symbol` /
424 // `to_symbol` per entry; the `(scope, from_symbol)`
425 // sort key is re-established after rewrite
426 // * `ShadowTable` — `ShadowEntry.symbol` per entry; the
427 // `(scope, symbol, byte_offset)` sort key and the
428 // `chains` range index are rebuilt after rewrite
429 //
430 // `NodeArena` (above) is the fifth surface; it is handled
431 // inline because it is the single largest consumer and
432 // already has a purpose-built helper.
433 //
434 // Iter-3 B1 (verbatim): `BidirectionalEdgeStore` holds
435 // `EdgeKind` instances whose variants — `Imports{alias}`,
436 // `Exports{alias}`, `TypeOf{name}`, `TraitMethodBinding
437 // {trait_name, impl_type}`, `HttpRequest{url}`,
438 // `GrpcCall{service, method}`, `DbQuery{table}`,
439 // `TableRead{table_name, schema}`, `TableWrite{table_name,
440 // schema}`, `TriggeredBy{trigger_name, schema}`,
441 // `MessageQueue{protocol::Other(_), topic}`,
442 // `WebSocket{event}`, `GraphQLOperation{operation}`,
443 // `ProcessExec{command}`, `FileIpc{path_pattern}`,
444 // `ProtocolCall{protocol, metadata}` — carry
445 // `StringId` payloads inside both the steady-state CSR
446 // (`CsrGraph::edge_kind`) and the mutable delta
447 // (`DeltaBuffer` `DeltaEdge::kind`), in **both** the
448 // forward and reverse stores. Before this iter-4 fix,
449 // step 1 left those payloads un-remapped — so after
450 // `recycle_unreferenced` below, those edges would have
451 // dangled onto freed interner slots. The full-build
452 // pipeline recognises this surface explicitly at
453 // `build/entrypoint.rs` (Phase 4b) +
454 // `build/parallel_commit.rs::phase4_apply_global_remap`;
455 // the rebuild pipeline must do the same against
456 // committed storage because, unlike Phase 4b, the edges
457 // are no longer a `Vec<Vec<PendingEdge>>`.
458 //
459 // `BidirectionalEdgeStore::rewrite_edge_kind_string_ids_through_remap`
460 // is the sixth surface on this list. The exhaustive match
461 // on `EdgeKind` lives in
462 // `parallel_commit::remap_edge_kind_string_ids` so adding a
463 // new `StringId`-bearing variant becomes a single source of
464 // truth: a compile error there forces both pipelines to
465 // rewrite it.
466 //
467 // Stores whose data is keyed by `NodeId`/`FileId`/`EdgeId`
468 // only (metadata keyed on NodeId, node / edge provenance,
469 // scope arena indexed by ScopeId, scope provenance,
470 // file-segments keyed on FileId/EdgeId) do not hold
471 // `StringId` payloads and are out of scope for this
472 // rewrite. Any future field that lifts a `StringId` onto
473 // `CodeGraph` must extend this list in the same commit
474 // that declares it, matching the A2 §K discipline.
475 self.indices.rewrite_string_ids_through_remap(&string_remap);
476 self.files.rewrite_string_ids_through_remap(&string_remap);
477 self.alias_table
478 .rewrite_string_ids_through_remap(&string_remap);
479 self.shadow_table
480 .rewrite_string_ids_through_remap(&string_remap);
481 // Iter-4 K.B1 fix: rewrite `StringId` payloads carried by
482 // committed `EdgeKind`s in both forward and reverse stores,
483 // across both CSR and delta tiers. Must run before
484 // `recycle_unreferenced` below so freed slots stay unreferenced.
485 self.edges
486 .rewrite_edge_kind_string_ids_through_remap(&string_remap);
487 }
488 self.strings.recycle_unreferenced();
489
490 // ----------------------------------------------------------------
491 // Step 2 — Compact NodeArena.
492 //
493 // K.A1 row: `NodeArena::retain_nodes` drops every occupied slot
494 // whose NodeId fails the predicate, advancing the slot's
495 // generation so lingering NodeId handles become stale.
496 // ----------------------------------------------------------------
497 {
498 let tombstones = &self.tombstones;
499 let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
500 Box::new(move |nid| !tombstones.contains(&nid));
501 self.nodes.retain_nodes(&*predicate);
502 }
503
504 // ----------------------------------------------------------------
505 // Step 3 — Compact BidirectionalEdgeStore (forward + reverse).
506 //
507 // K.A2 + K.A3 rows: `retain_nodes` drops every delta edge with
508 // a tombstoned endpoint in either direction. CSR compaction is
509 // deferred to step 9 (CSR is derived state, rebuilt not mutated).
510 // ----------------------------------------------------------------
511 {
512 let tombstones = &self.tombstones;
513 let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
514 Box::new(move |nid| !tombstones.contains(&nid));
515 self.edges.retain_nodes(&*predicate);
516 }
517
518 // ----------------------------------------------------------------
519 // Step 4 — Compact AuxiliaryIndices.
520 //
521 // K.A4–K.A7 rows (kind / name / qualified-name / file indices):
522 // a single `AuxiliaryIndices::retain_nodes` call visits every
523 // inner bucket.
524 // ----------------------------------------------------------------
525 {
526 let tombstones = &self.tombstones;
527 let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
528 Box::new(move |nid| !tombstones.contains(&nid));
529 self.indices.retain_nodes(&*predicate);
530 }
531
532 // ----------------------------------------------------------------
533 // Step 5 — Compact NodeMetadataStore.
534 //
535 // K.A8 row: macro / classpath per-node metadata keyed by the
536 // full `(index, generation)` pair.
537 // ----------------------------------------------------------------
538 {
539 let tombstones = &self.tombstones;
540 let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
541 Box::new(move |nid| !tombstones.contains(&nid));
542 self.macro_metadata.retain_nodes(&*predicate);
543 }
544
545 // ----------------------------------------------------------------
546 // Step 6 — Compact FileRegistry per-file buckets.
547 //
548 // K.B1 row. Iter-2 B2 (verbatim): this step used to be a
549 // no-op scheduled against a future base-plan Step 1. Pulling
550 // `per_file_nodes: HashMap<FileId, Vec<NodeId>>` forward into
551 // Gate 0c retires the no-op — every `NodeId` the rebuild's
552 // parallel-parse pass committed is already bucketed via
553 // `FileRegistry::record_node`, so this step now does real work:
554 //
555 // * drop every `NodeId` whose arena slot was tombstoned in
556 // step 2 (or earlier) via the shared predicate
557 // * dedup each surviving bucket
558 // * drop buckets that collapse to empty
559 //
560 // The §F.1 bucket-bijection check at step 13 consumes the
561 // result of this compaction, so a mis-applied predicate here
562 // surfaces as a panic at the publish boundary in debug builds.
563 // ----------------------------------------------------------------
564 {
565 let tombstones = &self.tombstones;
566 let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
567 Box::new(move |nid| !tombstones.contains(&nid));
568 self.files.retain_nodes(&*predicate);
569 }
570
571 // ----------------------------------------------------------------
572 // Step 7 — Compact K-list extras.
573 //
574 // Rows K.A10 (node_provenance), K.A11 (scope_arena),
575 // K.A12 (alias_table), K.A13 (shadow_table). Each is a direct
576 // NodeIdBearing::retain_nodes call.
577 //
578 // When a future task lifts a new NodeId-bearing structure onto
579 // `CodeGraph`, extend this step with one additional
580 // `retain_nodes` call and add a K.A/K.B row to
581 // `super::coverage` per the plan §K contract.
582 // ----------------------------------------------------------------
583 {
584 let tombstones = &self.tombstones;
585 let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
586 Box::new(move |nid| !tombstones.contains(&nid));
587 self.node_provenance.retain_nodes(&*predicate);
588 self.scope_arena.retain_nodes(&*predicate);
589 self.alias_table.retain_nodes(&*predicate);
590 self.shadow_table.retain_nodes(&*predicate);
591 }
592
593 // ----------------------------------------------------------------
594 // Step 8 — Drain tombstones into `drained_tombstones`.
595 //
596 // The residue check at step 14 asserts that the finalized
597 // `CodeGraph` does not contain any of these NodeIds in any
598 // NodeId-bearing structure. Keep the drained set until the
599 // check completes; then it is dropped with `self`.
600 // ----------------------------------------------------------------
601 self.drained_tombstones = std::mem::take(&mut self.tombstones);
602
603 // ----------------------------------------------------------------
604 // Step 9 — Rebuild CSR adjacency (both directions).
605 //
606 // CSR is derived state (K.A9 — "CSR adjacency is derived state;
607 // rebuilt from compacted edges — never mutated in place"). After
608 // step 3 filtered the delta tier to live-only endpoints, the
609 // correct operation is to *construct* a fresh CSR from the
610 // compacted delta snapshot and *install* it — not merely drop
611 // the stale cache. This matches plan §H lines 684–691 (the
612 // committed-then-queried graph has a CSR from the first read).
613 //
614 // Build path: `compaction::snapshot_edges` → compaction merges
615 // in-memory delta (no tombstones left after step 3) →
616 // `build_compacted_csr` constructs an immutable `CsrGraph` →
617 // `swap_csrs_and_clear_deltas` installs both directions and
618 // clears the now-absorbed delta buffers. The node_count is the
619 // post-compaction slot count of the arena (not the live count —
620 // CSR uses dense slot indexing; vacant slots appear as nodes
621 // with zero out-edges).
622 //
623 // The plan's example spells this `self.edges.rebuild_csr()` as
624 // shorthand for this sequence; we expand it to concrete calls
625 // so reviewers can audit each primitive. The post-condition is
626 // identical to the plan's: after step 9 the `CodeGraph` about
627 // to be assembled has no CSR referencing a tombstoned NodeId,
628 // because the CSR was built from the already-filtered delta.
629 // ----------------------------------------------------------------
630 {
631 use crate::graph::unified::compaction::{
632 Direction, build_compacted_csr, snapshot_edges,
633 };
634 let node_count = self.nodes.slot_count();
635 // Snapshot the forward/reverse deltas (CSR is still stale
636 // at this point — `build_compacted_csr` uses the delta's
637 // live-only contents). Because step 3 tombstoned endpoints
638 // out of the delta already, the merged build sees only
639 // live edges.
640 let forward_snapshot = {
641 let forward = self.edges.forward();
642 snapshot_edges(&forward, node_count)
643 };
644 let reverse_snapshot = {
645 let reverse = self.edges.reverse();
646 snapshot_edges(&reverse, node_count)
647 };
648 // Build both directions in parallel (matches the pattern
649 // used by `build/entrypoint.rs`).
650 let (forward_csr, reverse_csr) = rayon::join(
651 || build_compacted_csr(&forward_snapshot, Direction::Forward),
652 || build_compacted_csr(&reverse_snapshot, Direction::Reverse),
653 );
654 let (forward_csr, _) =
655 forward_csr.map_err(|e| crate::graph::error::GraphBuilderError::Internal {
656 reason: format!("rebuild finalize step 9 (forward CSR build): {e}"),
657 })?;
658 let (reverse_csr, _) =
659 reverse_csr.map_err(|e| crate::graph::error::GraphBuilderError::Internal {
660 reason: format!("rebuild finalize step 9 (reverse CSR build): {e}"),
661 })?;
662 // Install both CSRs and clear the absorbed deltas.
663 self.edges
664 .swap_csrs_and_clear_deltas(forward_csr, reverse_csr);
665 }
666
667 // ----------------------------------------------------------------
668 // Step 10 — Per-language confidence update.
669 //
670 // Incremental rebuild must ensure the published confidence map
671 // matches the set of languages that actually have live nodes in
672 // the rebuild's graph. Languages whose only source files were
673 // all removed during this rebuild must not linger in the
674 // confidence surface — they would mislead MCP clients into
675 // believing analysis for that language is still available.
676 //
677 // The rebuild-local `FileRegistry` carries per-file `Language`
678 // tags; we enumerate them, collect the set of languages with at
679 // least one live file, and drop confidence entries for any
680 // language that no longer appears. Entries for still-present
681 // languages are preserved as-is (their `ConfidenceMetadata` is
682 // updated by the plugin-level confidence-ingestion path in
683 // `CodeGraph::merge_confidence`; this step only filters, it
684 // does not fabricate new samples).
685 //
686 // A language entry is preserved when (a) at least one live file
687 // is tagged with that language in the rebuild-local registry,
688 // OR (b) the confidence entry carries a non-default set of
689 // limitations / unavailable-features — that is analysis
690 // metadata the plugin layer deliberately recorded, and dropping
691 // it on a zero-file rebuild would be lossy. Both conditions are
692 // evaluated against the rebuild-local state that will be
693 // published by step 12.
694 // ----------------------------------------------------------------
695 {
696 use std::collections::BTreeSet;
697 let mut active_languages: BTreeSet<String> = BTreeSet::new();
698 for (_file_id, _path, maybe_language) in self.files.iter_with_language() {
699 if let Some(language) = maybe_language {
700 active_languages.insert(language.to_string());
701 }
702 }
703 self.confidence.retain(|language_key, meta| {
704 if active_languages.contains(language_key) {
705 true
706 } else {
707 // Preserve entries that encode deliberate
708 // plugin-recorded limitations even when no live
709 // files remain — those are analysis-state facts
710 // the daemon must not silently drop.
711 !meta.limitations.is_empty() || !meta.unavailable_features.is_empty()
712 }
713 });
714 }
715
716 // ----------------------------------------------------------------
717 // Step 11 — Epoch bump.
718 //
719 // `prior_epoch` captured at `clone_for_rebuild` time; +1 marks
720 // the publication boundary. Wrapping add matches the existing
721 // `CodeGraph::bump_epoch` behavior at
722 // `sqry-core/src/graph/unified/concurrent/graph.rs`.
723 // ----------------------------------------------------------------
724 let new_epoch = self.epoch.wrapping_add(1);
725
726 // ----------------------------------------------------------------
727 // Step 12 — Assemble the immutable `CodeGraph`.
728 //
729 // Every Arc-wrapped field is freshly wrapped from the
730 // rebuild-local owned value, so the new `CodeGraph` has no
731 // Arc-sharing relationship with the pre-rebuild snapshot. This
732 // is the point at which the rebuild-local interner becomes
733 // immutable (step 1's freeze).
734 // ----------------------------------------------------------------
735 let graph = CodeGraph::__assemble_from_rebuild_parts_internal(
736 self.nodes,
737 self.edges,
738 self.strings,
739 self.files,
740 self.indices,
741 self.macro_metadata,
742 self.node_provenance,
743 self.edge_provenance,
744 self.fact_epoch,
745 new_epoch,
746 self.confidence,
747 self.scope_arena,
748 self.alias_table,
749 self.shadow_table,
750 self.scope_provenance_store,
751 self.file_segments,
752 );
753
754 // ----------------------------------------------------------------
755 // Steps 13 + 14 — (debug) Publish-boundary invariants.
756 //
757 // Per plan §F.3 "single source of truth", both the §F.1 bucket
758 // bijection and the §F.2 tombstone-residue checks fire through
759 // the canonical `publish::assert_publish_invariants` helper.
760 // That helper is also called at the full-rebuild end
761 // (`build_unified_graph_inner`), by Task 6's
762 // `WorkspaceManager::publish_graph`, and by every §E harness
763 // iteration — so any invariant drift surfaces in all four
764 // places simultaneously, not in a subset.
765 //
766 // The §F.2 assertion is the **single site** against
767 // `self.drained_tombstones` (populated at step 8); we do not
768 // run the residue check anywhere else.
769 // ----------------------------------------------------------------
770 #[cfg(any(debug_assertions, test))]
771 crate::graph::unified::publish::assert_publish_invariants(&graph, &self.drained_tombstones);
772
773 Ok(graph)
774 }
775
776 /// Returns the number of NodeIds currently staged for tombstoning
777 /// via [`RebuildGraph::tombstone`] (to be added by Task 4 Step 4).
778 /// Useful for Gate 0c tests that exercise finalize with an empty
779 /// or non-empty tombstone set.
780 #[must_use]
781 pub fn pending_tombstone_count(&self) -> usize {
782 self.tombstones.len()
783 }
784
785 /// Shared-reference accessor for the rebuild-local
786 /// [`NodeArena`]. Read-only; the writer is the rebuild dispatcher
787 /// which holds `&mut self`.
788 ///
789 /// Used by sqry-daemon's `WorkspaceManager` (Task 6) and by the
790 /// Task 4 Step 4 scale test to inspect arena state between
791 /// [`remove_file`](Self::remove_file) calls without going through
792 /// the pub(crate) field directly.
793 #[must_use]
794 pub fn nodes(&self) -> &NodeArena {
795 &self.nodes
796 }
797
798 /// Shared-reference accessor for the rebuild-local
799 /// [`FileRegistry`]. Read-only. The rebuild dispatcher writes via
800 /// `&mut self` on [`remove_file`](Self::remove_file) and
801 /// finalization.
802 #[must_use]
803 pub fn files(&self) -> &FileRegistry {
804 &self.files
805 }
806
807 /// Shared-reference accessor for the rebuild-local
808 /// [`FileSegmentTable`]. Read-only.
809 ///
810 /// `FileSegmentTable` is a plain `Vec<Option<FileSegment>>` with no
811 /// interior mutability (see
812 /// `sqry-core/src/graph/unified/storage/segment.rs`), so this
813 /// accessor is genuinely read-only. Writers ride through the
814 /// `&mut self` mutation path inside
815 /// [`remove_file`](Self::remove_file) and
816 /// [`finalize`](Self::finalize).
817 ///
818 /// Added as part of the iter-1 Codex review fix for Task 4
819 /// Steps 2-3: the `remove_file` integration tests need to assert
820 /// that a file's segment entry is cleared after removal, closing
821 /// the FileId-recycle stale-range bug documented at
822 /// `sqry-core/tests/incremental_remove_file_scale.rs`.
823 #[must_use]
824 pub fn file_segments(&self) -> &FileSegmentTable {
825 &self.file_segments
826 }
827
828 // Note (iter-1 review fix): a `pub fn edges(&self) -> &BidirectionalEdgeStore`
829 // accessor was removed. `BidirectionalEdgeStore` exposes
830 // `forward_mut(&self)` and `reverse_mut(&self)` (interior mutability),
831 // so returning `&BidirectionalEdgeStore` from `&self` would let
832 // external callers escalate a "read-only" handle into a writer. The
833 // rebuild contract requires that edge mutation only happens inside
834 // `RebuildGraph::remove_file` / `finalize` on `&mut self`. If a
835 // read-only edge-view accessor is ever needed by a legitimate
836 // consumer, add a wrapper struct (e.g., `BidirectionalEdgeStoreView`)
837 // that exposes only `forward()` / `reverse()` / `edges_from` /
838 // `edges_to` / `stats` on `&self` — never the `*_mut` methods.
839
840 /// Stage a NodeId for tombstoning during the next `finalize` pass.
841 ///
842 /// Gate 0c ships this helper so finalize tests can drive the
843 /// 14-step contract with a realistic tombstone set without waiting
844 /// for Task 4 Step 4's `remove_file` plumbing. Production callers
845 /// will route through `RebuildGraph::remove_file` (Task 4 Step 4),
846 /// which internally populates the same set via
847 /// `FileRegistry::take_nodes`.
848 pub fn tombstone(&mut self, id: NodeId) {
849 self.tombstones.insert(id);
850 }
851
852 /// Stage every `NodeId` in `ids` for tombstoning during the next
853 /// `finalize` pass. Equivalent to calling [`tombstone`](Self::tombstone)
854 /// for each id, but expresses the bulk intent at the call site.
855 ///
856 /// Used by [`remove_file`](Self::remove_file) to fold every
857 /// file-local NodeId into the staged set in a single pass.
858 pub(crate) fn tombstone_many<I: IntoIterator<Item = NodeId>>(&mut self, ids: I) {
859 self.tombstones.extend(ids);
860 }
861
862 /// Drain every `NodeId` belonging to `file_id`, invalidate every
863 /// rebuild-local edge whose source or target is one of those nodes
864 /// (across both forward and reverse CSR + delta tiers of the
865 /// rebuild-local edge store), drop the file's entry from the
866 /// rebuild-local [`FileRegistry`], and return the list of
867 /// tombstoned [`NodeId`]s.
868 ///
869 /// This is the rebuild-side mirror of
870 /// [`super::super::concurrent::CodeGraph::remove_file`]. Whereas
871 /// the `CodeGraph` variant mutates a live publishable graph in
872 /// place (O(1) publish once the caller wraps it in an `Arc`), this
873 /// variant operates on the owned, pre-finalize
874 /// [`RebuildGraph`] state — so the edge-store tombstones land in
875 /// the CSR's tombstone bitmap and the delta buffer, and are later
876 /// physically purged by [`finalize`](Self::finalize) step 9 when
877 /// the CSR is rebuilt from the compacted delta (§H lines 684–691).
878 ///
879 /// The method does **not** rewrite `NodeIdBearing` surfaces on the
880 /// rebuild (NodeArena, AuxiliaryIndices, NodeMetadataStore,
881 /// NodeProvenanceStore, ScopeArena, AliasTable, ShadowTable) —
882 /// those surfaces are compacted once, uniformly, by `finalize()`
883 /// steps 2–7 against the accumulated `tombstones` set. Running a
884 /// partial compaction here would duplicate work and violate the
885 /// plan's "compact once at step N" contract.
886 ///
887 /// Instead, the tombstoned `NodeId`s are accumulated in
888 /// `self.tombstones` via [`tombstone_many`](Self::tombstone_many).
889 /// Successive `remove_file` calls against different files
890 /// accumulate into the same set; `finalize()` then sweeps every
891 /// K.A/K.B surface once with the union predicate.
892 ///
893 /// # What is tombstoned immediately
894 ///
895 /// Only two surfaces are mutated before `finalize()` runs:
896 ///
897 /// * **NodeArena**: each file-local `NodeId` is `remove`d so the
898 /// slot's generation advances and stale handles cannot alias a
899 /// re-allocation. Downstream compaction at step 2 is then a
900 /// no-op for these slots (idempotent; the arena skips stale
901 /// generations).
902 /// * **Edge store** (both forward + reverse, both CSR + delta):
903 /// [`BidirectionalEdgeStore::tombstone_edges_for_nodes`] kills
904 /// every edge whose source or target is one of the drained
905 /// NodeIds. CSR tombstones land in `csr_tombstones`; delta
906 /// edges are dropped outright. Step 9 of `finalize` then
907 /// rebuilds a fresh CSR from the tombstone-free delta, so the
908 /// CSR tombstones become physically invisible by publish time.
909 /// * **FileRegistry**: the bucket is drained via
910 /// [`FileRegistry::take_nodes`] and the file entry is
911 /// deregistered via [`FileRegistry::unregister`]. Both are
912 /// idempotent on an already-removed file.
913 ///
914 /// # Returned value
915 ///
916 /// The list of NodeIds that were staged for tombstoning. Empty on
917 /// an unknown or already-removed file. Useful for Gate 0c finalize
918 /// tests that need to assert on bucket-drain correctness or on the
919 /// union of staged tombstones across several `remove_file` calls.
920 ///
921 /// # Idempotency
922 ///
923 /// Calling twice with the same `file_id` is a no-op on the second
924 /// call. The bucket is already drained, `take_nodes` returns an
925 /// empty `Vec`, the rest of the method short-circuits, and the
926 /// `tombstones` set is unchanged.
927 #[allow(dead_code)] // Intra-crate consumer is Task 4 Step 4
928 // (`incremental_rebuild`); external consumer is sqry-daemon's
929 // Task 6 `WorkspaceManager` via the feature-gated `pub use` of
930 // `RebuildGraph`. The visibility is `pub` (not `pub(crate)`) so
931 // the daemon can drive file removals through the rebuild plane,
932 // which is the canonical path for file-deletion events per §F.2.
933 pub fn remove_file(&mut self, file_id: super::super::file::FileId) -> Vec<NodeId> {
934 // Drain the rebuild-local FileRegistry bucket.
935 let tombstoned: Vec<NodeId> = self.files.take_nodes(file_id);
936 // Deregister the file entry unconditionally (idempotent).
937 self.files.unregister(file_id);
938 // Clear the rebuild-local `FileSegmentTable` entry for this
939 // file (idempotent — `remove` no-ops on unknown ids). This
940 // matches `CodeGraph::remove_file`'s behaviour: finalize()
941 // publishes `self.file_segments` verbatim at step 12, so any
942 // stale entry leaked here would survive into the assembled
943 // `CodeGraph`. Because `FileRegistry::unregister` recycles
944 // `FileId` slots, a leaked segment would later alias a
945 // different file's node range after the slot is reissued —
946 // which would cause `reindex_files` to tombstone the wrong
947 // range. Clearing the segment at remove time is the only
948 // defence that closes both the "finalize publishes stale
949 // segment" path AND the "FileId recycle attaches stale
950 // range" path.
951 self.file_segments.remove(file_id);
952
953 if tombstoned.is_empty() {
954 return tombstoned;
955 }
956
957 let dead: std::collections::HashSet<NodeId> = tombstoned.iter().copied().collect();
958
959 // 1. Tombstone each arena slot. `NodeArena::remove` is
960 // idempotent against stale generations, so retriggering a
961 // file removal after finalize is safe.
962 for &nid in &tombstoned {
963 let _ = self.nodes.remove(nid);
964 }
965
966 // 2. Invalidate edges across both CSR + delta in both
967 // directions on the rebuild-local edge store.
968 self.edges.tombstone_edges_for_nodes(&dead);
969
970 // 3. Stage the drained NodeIds for the finalize-time K.A/K.B
971 // compaction sweep. finalize() will consume these at steps
972 // 2–7 through the NodeIdBearing::retain_nodes predicate.
973 self.tombstone_many(tombstoned.iter().copied());
974
975 tombstoned
976 }
977
978 /// Returns the rebuild-local epoch captured at `clone_for_rebuild`.
979 #[must_use]
980 pub fn prior_epoch(&self) -> u64 {
981 self.epoch
982 }
983
984 /// Pre-finalize tombstone-residue check on the rebuild-state
985 /// structures themselves (plan §F.2 literal named API).
986 ///
987 /// Iterates every `NodeIdBearing` field on this `RebuildGraph` and
988 /// panics if any contains a NodeId present in `self.tombstones`.
989 /// This is a diagnostic helper for mid-rebuild consistency checks:
990 /// e.g., Task 4 Step 4's `RebuildGraph::remove_file` can call this
991 /// after tombstoning a file but before more closure files land, to
992 /// prove the just-tombstoned NodeIds really left every index before
993 /// the next pass commits. The `RebuildGraph::finalize` flow then
994 /// re-asserts against the drained set on the *assembled* `CodeGraph`
995 /// at step 14 — those are two different snapshots of the same
996 /// invariant; both must hold.
997 ///
998 /// No-op when `self.tombstones` is empty or in release builds.
999 ///
1000 /// Does **not** replace the step-14 call site; the single source of
1001 /// truth for the publish-boundary residue check remains
1002 /// `crate::graph::unified::publish::assert_publish_invariants`. This
1003 /// helper exists so pre-finalize call sites (Task 4 Step 4's
1004 /// post-remove sanity check, incremental-engine debug probes, Gate
1005 /// 0d negative tests) can name the assertion without rolling their
1006 /// own iteration.
1007 #[cfg(any(debug_assertions, test))]
1008 pub fn assert_no_tombstone_residue(&self) {
1009 use super::coverage::NodeIdBearing;
1010 let dead = &self.tombstones;
1011 if dead.is_empty() {
1012 return;
1013 }
1014 for nid in self.nodes.all_node_ids() {
1015 assert!(
1016 !dead.contains(&nid),
1017 "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in NodeArena"
1018 );
1019 }
1020 for nid in self.edges.all_node_ids() {
1021 assert!(
1022 !dead.contains(&nid),
1023 "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in edge store"
1024 );
1025 }
1026 for nid in self.indices.all_node_ids() {
1027 assert!(
1028 !dead.contains(&nid),
1029 "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in auxiliary indices"
1030 );
1031 }
1032 for nid in self.macro_metadata.all_node_ids() {
1033 assert!(
1034 !dead.contains(&nid),
1035 "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in macro metadata"
1036 );
1037 }
1038 for nid in self.node_provenance.all_node_ids() {
1039 assert!(
1040 !dead.contains(&nid),
1041 "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in node provenance"
1042 );
1043 }
1044 for nid in self.scope_arena.all_node_ids() {
1045 assert!(
1046 !dead.contains(&nid),
1047 "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in scope arena"
1048 );
1049 }
1050 for nid in self.alias_table.all_node_ids() {
1051 assert!(
1052 !dead.contains(&nid),
1053 "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in alias table"
1054 );
1055 }
1056 for nid in self.shadow_table.all_node_ids() {
1057 assert!(
1058 !dead.contains(&nid),
1059 "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in shadow table"
1060 );
1061 }
1062 for nid in self.files.all_node_ids() {
1063 assert!(
1064 !dead.contains(&nid),
1065 "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in per-file bucket"
1066 );
1067 }
1068 }
1069}
1070
1071// =====================================================================
1072// Step 1 helper: rewrite every live `NodeEntry`'s interned-string
1073// fields through the dedup-remap table produced by
1074// `StringInterner::build_dedup_table()`.
1075// =====================================================================
1076
1077/// Rewrite every `NodeEntry` field that holds a [`crate::graph::unified::string::id::StringId`]
1078/// through `remap` so no live node points at a collapsed-duplicate
1079/// slot. Called from finalize step 1 after the interner has been
1080/// canonicalised; a no-op when `remap` is empty (the common case of a
1081/// no-op rebuild).
1082///
1083/// Fields rewritten: `name`, `signature`, `doc`, `qualified_name`,
1084/// `visibility`. These are every `StringId` / `Option<StringId>` field
1085/// that `NodeEntry` currently declares. Adding a new `StringId`-valued
1086/// field to `NodeEntry` requires extending this helper — the closest
1087/// enforcement is the compile-fail coverage in the `sqry_graph_fields!`
1088/// macro which forces a new `RebuildGraph` field and a matching
1089/// finalize step for any new `CodeGraph` field; individual arena
1090/// fields are caught at review time against this helper.
1091fn rewrite_node_entries_through_remap(
1092 nodes: &mut NodeArena,
1093 remap: &HashMap<
1094 crate::graph::unified::string::id::StringId,
1095 crate::graph::unified::string::id::StringId,
1096 >,
1097) {
1098 if remap.is_empty() {
1099 return;
1100 }
1101 for (_id, entry) in nodes.iter_mut() {
1102 if let Some(&canon) = remap.get(&entry.name) {
1103 entry.name = canon;
1104 }
1105 if let Some(sid) = entry.signature
1106 && let Some(&canon) = remap.get(&sid)
1107 {
1108 entry.signature = Some(canon);
1109 }
1110 if let Some(sid) = entry.doc
1111 && let Some(&canon) = remap.get(&sid)
1112 {
1113 entry.doc = Some(canon);
1114 }
1115 if let Some(sid) = entry.qualified_name
1116 && let Some(&canon) = remap.get(&sid)
1117 {
1118 entry.qualified_name = Some(canon);
1119 }
1120 if let Some(sid) = entry.visibility
1121 && let Some(&canon) = remap.get(&sid)
1122 {
1123 entry.visibility = Some(canon);
1124 }
1125 }
1126}
1127
1128// =====================================================================
1129// Assembly path notes (A2 §H "Type-enforced publish path")
1130// =====================================================================
1131//
1132// The only route from `RebuildGraph` to `Arc<CodeGraph>` is:
1133//
1134// let code_graph = rebuild.finalize()?; // Step 12 assembles CodeGraph
1135// let arc = Arc::new(code_graph); // caller wraps (publish site)
1136//
1137// The assembly inside `finalize` uses
1138// `CodeGraph::__assemble_from_rebuild_parts_internal`, which is
1139// `pub(crate)` on `CodeGraph`. That means downstream crates — even
1140// `sqry-daemon` with `rebuild-internals` enabled — cannot call the
1141// assembler directly; they must route through `finalize()`, which
1142// guarantees the 14-step compaction sequence runs first.
1143//
1144// The trybuild fixture
1145// `sqry-core/tests/rebuild_internals_compile_fail/rebuild_graph_no_public_assembly.rs`
1146// exercises this: a downstream crate attempting to call
1147// `__assemble_from_rebuild_parts_internal` fails with E0603 ("function
1148// is private"), and attempting to `impl From<RebuildGraph> for CodeGraph`
1149// from outside this module fails with E0117 (orphan-rule violation).
1150
1151// =====================================================================
1152// Tests
1153// =====================================================================
1154
1155#[cfg(test)]
1156mod tests {
1157 use super::*;
1158 use crate::graph::unified::file::FileId;
1159 use crate::graph::unified::node::NodeKind;
1160 use crate::graph::unified::storage::arena::NodeEntry;
1161 use crate::graph::unified::storage::metadata::MacroNodeMetadata;
1162
1163 /// Seed a `CodeGraph` with a handful of live nodes distributed over
1164 /// two files so the per-field compaction steps have something
1165 /// non-trivial to operate on.
1166 fn seeded_graph() -> (CodeGraph, NodeId, NodeId, NodeId) {
1167 let mut graph = CodeGraph::new();
1168 let file_a = FileId::new(1);
1169 let file_b = FileId::new(2);
1170 let sym = graph.strings_mut().intern("symbol_a").expect("intern a");
1171 let node_a;
1172 let node_b;
1173 let node_c;
1174 {
1175 let arena = graph.nodes_mut();
1176 node_a = arena
1177 .alloc(NodeEntry::new(NodeKind::Function, sym, file_a))
1178 .expect("alloc a");
1179 node_b = arena
1180 .alloc(NodeEntry::new(NodeKind::Method, sym, file_a))
1181 .expect("alloc b");
1182 node_c = arena
1183 .alloc(NodeEntry::new(NodeKind::Struct, sym, file_b))
1184 .expect("alloc c");
1185 }
1186 graph
1187 .indices_mut()
1188 .add(node_a, NodeKind::Function, sym, Some(sym), file_a);
1189 graph
1190 .indices_mut()
1191 .add(node_b, NodeKind::Method, sym, Some(sym), file_a);
1192 graph
1193 .indices_mut()
1194 .add(node_c, NodeKind::Struct, sym, Some(sym), file_b);
1195 graph
1196 .macro_metadata_mut()
1197 .insert(node_a, MacroNodeMetadata::default());
1198 (graph, node_a, node_b, node_c)
1199 }
1200
1201 #[test]
1202 fn clone_for_rebuild_copies_every_field_without_arc_sharing() {
1203 let (graph, _a, _b, _c) = seeded_graph();
1204 let rebuild = graph.clone_for_rebuild();
1205
1206 // Field-level counts match the source graph.
1207 assert_eq!(rebuild.nodes.len(), graph.nodes().len());
1208 assert_eq!(rebuild.macro_metadata.len(), graph.macro_metadata().len());
1209 // The rebuild owns its own arena (not an Arc share), so
1210 // mutating it in place would not affect the source graph. We
1211 // verify this by tombstoning a node in the rebuild and
1212 // confirming the source is unchanged.
1213 let mut rebuild = rebuild;
1214 let ids: Vec<NodeId> = rebuild.nodes.all_node_ids().collect();
1215 let victim = *ids.first().expect("at least one node");
1216 rebuild.tombstone(victim);
1217 assert_eq!(rebuild.pending_tombstone_count(), 1);
1218 // Source graph unaffected.
1219 assert_eq!(graph.nodes().len(), ids.len());
1220 }
1221
1222 #[test]
1223 fn clone_for_rebuild_preserves_epoch_and_fact_epoch() {
1224 let (mut graph, _, _, _) = seeded_graph();
1225 graph.set_epoch(7);
1226 let rebuild = graph.clone_for_rebuild();
1227 assert_eq!(rebuild.prior_epoch(), 7);
1228 assert_eq!(rebuild.fact_epoch, graph.fact_epoch());
1229 }
1230
1231 // ---- Step-level finalize tests ---------------------------------
1232
1233 #[test]
1234 fn finalize_step11_bumps_epoch_by_one() {
1235 let (mut graph, _, _, _) = seeded_graph();
1236 graph.set_epoch(41);
1237 let rebuild = graph.clone_for_rebuild();
1238 let finalized = rebuild.finalize().expect("finalize ok");
1239 assert_eq!(finalized.epoch(), 42);
1240 }
1241
1242 #[test]
1243 fn finalize_step8_drains_tombstones_into_drained_set() {
1244 // We use a reach-into-guts test: finalize consumes self, so we
1245 // verify the drain via a direct RebuildGraph constructed inside
1246 // this module (which has crate visibility into the field).
1247 let (graph, a, _b, _c) = seeded_graph();
1248 let mut rebuild = graph.clone_for_rebuild();
1249 rebuild.tombstone(a);
1250 assert_eq!(rebuild.pending_tombstone_count(), 1);
1251 // After finalize, the graph must not contain `a`.
1252 let finalized = rebuild.finalize().expect("finalize ok");
1253 assert!(
1254 finalized.nodes().get(a).is_none(),
1255 "tombstoned node must be gone from arena"
1256 );
1257 }
1258
1259 #[test]
1260 fn finalize_steps_2_and_4_compact_arena_and_indices_consistently() {
1261 let (graph, a, _b, c) = seeded_graph();
1262 let mut rebuild = graph.clone_for_rebuild();
1263 rebuild.tombstone(a);
1264 rebuild.tombstone(c);
1265 let finalized = rebuild.finalize().expect("finalize ok");
1266
1267 // Arena compaction (step 2): dropped nodes are gone.
1268 assert!(finalized.nodes().get(a).is_none());
1269 assert!(finalized.nodes().get(c).is_none());
1270 // Step 4 must compact the auxiliary indices in lockstep, so
1271 // the tombstoned ids do not linger in any index.
1272 use crate::graph::unified::storage::metadata::NodeMetadata;
1273 let _ = NodeMetadata::Macro(MacroNodeMetadata::default()); // silence unused import warning on some cfgs
1274 assert!(!finalized.indices().by_kind(NodeKind::Function).contains(&a));
1275 assert!(!finalized.indices().by_kind(NodeKind::Struct).contains(&c));
1276 }
1277
1278 #[test]
1279 fn finalize_step5_compacts_macro_metadata() {
1280 let (graph, a, _b, _c) = seeded_graph();
1281 let mut rebuild = graph.clone_for_rebuild();
1282 rebuild.tombstone(a);
1283 let finalized = rebuild.finalize().expect("finalize ok");
1284 // `a` had macro metadata seeded; after finalize it must be gone.
1285 assert!(finalized.macro_metadata().get(a).is_none());
1286 }
1287
1288 #[test]
1289 fn finalize_step9_installs_rebuilt_csr() {
1290 // Step 9 (plan §H lines 684–691) must install a freshly built
1291 // CSR — not merely drop the stale cache. The assembled graph's
1292 // forward and reverse edge stores must both expose a `csr()`
1293 // after finalize, and the deltas must be empty (absorbed by
1294 // the swap).
1295 let (graph, _, _, _) = seeded_graph();
1296 let rebuild = graph.clone_for_rebuild();
1297 let finalized = rebuild.finalize().expect("finalize ok");
1298 assert!(
1299 finalized.edges().forward().csr().is_some(),
1300 "step 9 must install forward CSR"
1301 );
1302 assert!(
1303 finalized.edges().reverse().csr().is_some(),
1304 "step 9 must install reverse CSR"
1305 );
1306 // Deltas were absorbed by the swap.
1307 assert_eq!(finalized.edges().forward().delta_count(), 0);
1308 assert_eq!(finalized.edges().reverse().delta_count(), 0);
1309 }
1310
1311 #[test]
1312 fn finalize_step10_drops_confidence_for_removed_languages() {
1313 // Seed a graph with Rust + Python confidence; the rebuild-local
1314 // FileRegistry has zero files tagged (seeded_graph does not
1315 // register paths), so step 10 must drop both.
1316 let (mut graph, _, _, _) = seeded_graph();
1317 graph.merge_confidence(
1318 "rust",
1319 crate::confidence::ConfidenceMetadata {
1320 level: crate::confidence::ConfidenceLevel::Verified,
1321 ..Default::default()
1322 },
1323 );
1324 graph.merge_confidence(
1325 "python",
1326 crate::confidence::ConfidenceMetadata {
1327 level: crate::confidence::ConfidenceLevel::Partial,
1328 ..Default::default()
1329 },
1330 );
1331 assert_eq!(graph.confidence().len(), 2);
1332
1333 let rebuild = graph.clone_for_rebuild();
1334 let finalized = rebuild.finalize().expect("finalize ok");
1335 assert!(
1336 finalized.confidence().is_empty(),
1337 "step 10 must drop confidence entries for languages that \
1338 have no live files and no recorded limitations"
1339 );
1340 }
1341
1342 #[test]
1343 fn finalize_step10_preserves_confidence_with_limitations() {
1344 // A confidence entry that encodes deliberate limitations must
1345 // survive a zero-file rebuild: dropping it would silently lose
1346 // plugin-recorded analysis state.
1347 let (mut graph, _, _, _) = seeded_graph();
1348 graph.merge_confidence(
1349 "rust",
1350 crate::confidence::ConfidenceMetadata {
1351 level: crate::confidence::ConfidenceLevel::AstOnly,
1352 limitations: vec!["no rust-analyzer".to_string()],
1353 unavailable_features: vec!["type inference".to_string()],
1354 },
1355 );
1356 let rebuild = graph.clone_for_rebuild();
1357 let finalized = rebuild.finalize().expect("finalize ok");
1358 assert_eq!(finalized.confidence().len(), 1);
1359 assert!(finalized.confidence().contains_key("rust"));
1360 }
1361
1362 #[test]
1363 fn finalize_step1_canonicalises_interner_via_dedup() {
1364 // Freeze step must leave the interner's lookup consistent
1365 // (non-stale) and zero duplicate slots after finalize.
1366 let (graph, _, _, _) = seeded_graph();
1367 let rebuild = graph.clone_for_rebuild();
1368 let finalized = rebuild.finalize().expect("finalize ok");
1369 assert!(
1370 !finalized.strings().is_lookup_stale(),
1371 "step 1 freeze must leave lookup_stale == false"
1372 );
1373 }
1374
1375 #[test]
1376 fn finalize_is_infallible_on_empty_tombstone_set() {
1377 let (graph, _, _, _) = seeded_graph();
1378 let rebuild = graph.clone_for_rebuild();
1379 let result = rebuild.finalize();
1380 assert!(result.is_ok(), "empty-tombstone finalize must succeed");
1381 let finalized = result.unwrap();
1382 // Every node survives.
1383 assert_eq!(finalized.nodes().len(), 3);
1384 }
1385
1386 #[test]
1387 fn finalize_survives_interner_snapshot_unchanged_when_no_edits() {
1388 let (graph, _, _, _) = seeded_graph();
1389 let prior_string_count = graph.strings().len();
1390 let rebuild = graph.clone_for_rebuild();
1391 let finalized = rebuild.finalize().expect("finalize ok");
1392 assert_eq!(
1393 finalized.strings().len(),
1394 prior_string_count,
1395 "freeze step must preserve string count across a no-op rebuild"
1396 );
1397 }
1398
1399 #[test]
1400 fn rebuild_graph_pending_tombstone_count_is_accurate() {
1401 let (graph, a, b, c) = seeded_graph();
1402 let mut rebuild = graph.clone_for_rebuild();
1403 assert_eq!(rebuild.pending_tombstone_count(), 0);
1404 rebuild.tombstone(a);
1405 rebuild.tombstone(b);
1406 rebuild.tombstone(c);
1407 // Inserting the same id again is idempotent.
1408 rebuild.tombstone(a);
1409 assert_eq!(rebuild.pending_tombstone_count(), 3);
1410 }
1411
1412 // ----------------------------------------------------------------
1413 // Iter-2 B1: StringId remap across every StringId-backed holder.
1414 // ----------------------------------------------------------------
1415
1416 /// Construct a RebuildGraph whose interner has intentional duplicate
1417 /// `StringId` slots (two slots containing the same canonical
1418 /// "symbol_dup" bytes), with the **live** state referencing both
1419 /// slots. `build_dedup_table()` must collapse them, step 1 must
1420 /// rewrite every StringId-backed surface through the remap, and
1421 /// step 4 must leave no bucket pointing at a recycled StringId.
1422 #[test]
1423 fn step1_remaps_auxiliary_indices_keys_through_dedup_table() {
1424 use crate::graph::unified::storage::interner::StringInterner;
1425
1426 let mut graph = CodeGraph::new();
1427 // Intern "alpha" twice via the bulk path so the two slots are
1428 // NOT coalesced at intern time. We emulate that by manually
1429 // using `alloc_range` + direct bulk_slices_mut assignment on
1430 // the interner.
1431 let interner: &mut StringInterner = graph.strings_mut();
1432 let start = interner.alloc_range(2).expect("alloc range");
1433 {
1434 let (s_slots, rc_slots) = interner.bulk_slices_mut(start, 2);
1435 s_slots[0] = Some(std::sync::Arc::from("alpha"));
1436 s_slots[1] = Some(std::sync::Arc::from("alpha"));
1437 rc_slots[0] = 1;
1438 rc_slots[1] = 1;
1439 }
1440 let id_dup = crate::graph::unified::string::id::StringId::new(start + 1);
1441 let id_canon = crate::graph::unified::string::id::StringId::new(start);
1442 let file = FileId::new(1);
1443
1444 // Allocate one node that references the duplicate StringId so
1445 // after dedup the arena still points at a canonical id.
1446 let mut entry = NodeEntry::new(NodeKind::Function, id_dup, file);
1447 entry.qualified_name = Some(id_dup);
1448 let node = graph.nodes_mut().alloc(entry).expect("alloc");
1449 graph
1450 .indices_mut()
1451 .add(node, NodeKind::Function, id_dup, Some(id_dup), file);
1452
1453 let rebuild = graph.clone_for_rebuild();
1454 let finalized = rebuild.finalize().expect("finalize ok");
1455
1456 // After step 1 the duplicate slot must be recycled and every
1457 // live surface must reference the canonical slot.
1458 // (a) Arena field references canonical
1459 let entry = finalized.nodes().get(node).expect("node survives");
1460 assert_eq!(entry.name, id_canon, "NodeEntry.name must be canonicalised");
1461 assert_eq!(
1462 entry.qualified_name,
1463 Some(id_canon),
1464 "NodeEntry.qualified_name must be canonicalised"
1465 );
1466 // (b) AuxiliaryIndices: by_name must find the node under canonical id.
1467 assert!(
1468 finalized.indices().by_name(id_canon).contains(&node),
1469 "indices.by_name under canonical StringId must contain the node"
1470 );
1471 // (c) And must NOT have any bucket under the duplicate StringId.
1472 assert!(
1473 finalized.indices().by_name(id_dup).is_empty(),
1474 "duplicate StringId bucket must be empty after remap"
1475 );
1476 assert!(
1477 finalized.indices().by_qualified_name(id_dup).is_empty(),
1478 "duplicate StringId qname bucket must be empty after remap"
1479 );
1480 // (d) Interner has no stale lookup.
1481 assert!(!finalized.strings().is_lookup_stale());
1482 }
1483
1484 #[test]
1485 fn step1_merges_aux_indices_buckets_when_keys_collapse() {
1486 use crate::graph::unified::storage::interner::StringInterner;
1487
1488 let mut graph = CodeGraph::new();
1489 let interner: &mut StringInterner = graph.strings_mut();
1490 let start = interner.alloc_range(2).expect("alloc range");
1491 {
1492 let (s_slots, rc_slots) = interner.bulk_slices_mut(start, 2);
1493 s_slots[0] = Some(std::sync::Arc::from("beta"));
1494 s_slots[1] = Some(std::sync::Arc::from("beta"));
1495 rc_slots[0] = 1;
1496 rc_slots[1] = 1;
1497 }
1498 let id_canon = crate::graph::unified::string::id::StringId::new(start);
1499 let id_dup = crate::graph::unified::string::id::StringId::new(start + 1);
1500 let file = FileId::new(1);
1501
1502 // Two nodes: one references canonical, one references duplicate.
1503 // After remap, both must land in the canonical bucket of
1504 // `name_index` (merged, deduplicated by NodeId).
1505 let node_canon = graph
1506 .nodes_mut()
1507 .alloc(NodeEntry::new(NodeKind::Function, id_canon, file))
1508 .expect("alloc canon");
1509 let node_dup = graph
1510 .nodes_mut()
1511 .alloc(NodeEntry::new(NodeKind::Function, id_dup, file))
1512 .expect("alloc dup");
1513 graph
1514 .indices_mut()
1515 .add(node_canon, NodeKind::Function, id_canon, None, file);
1516 graph
1517 .indices_mut()
1518 .add(node_dup, NodeKind::Function, id_dup, None, file);
1519
1520 let rebuild = graph.clone_for_rebuild();
1521 let finalized = rebuild.finalize().expect("finalize ok");
1522
1523 let canonical_bucket = finalized.indices().by_name(id_canon);
1524 assert!(canonical_bucket.contains(&node_canon));
1525 assert!(canonical_bucket.contains(&node_dup));
1526 // No duplicates within the merged bucket.
1527 let mut seen = std::collections::HashSet::new();
1528 for id in canonical_bucket {
1529 assert!(seen.insert(*id), "bucket must be dedup'd by NodeId");
1530 }
1531 assert!(
1532 finalized.indices().by_name(id_dup).is_empty(),
1533 "collapsed duplicate key bucket must be removed"
1534 );
1535 }
1536
1537 #[test]
1538 fn step1_remaps_file_registry_source_uri() {
1539 use crate::graph::unified::storage::interner::StringInterner;
1540
1541 let mut graph = CodeGraph::new();
1542 // Force two identical "file:///foo" StringId slots.
1543 let interner: &mut StringInterner = graph.strings_mut();
1544 let start = interner.alloc_range(2).expect("alloc range");
1545 {
1546 let (s, rc) = interner.bulk_slices_mut(start, 2);
1547 s[0] = Some(std::sync::Arc::from("file:///foo"));
1548 s[1] = Some(std::sync::Arc::from("file:///foo"));
1549 rc[0] = 1;
1550 rc[1] = 1;
1551 }
1552 let id_canon = crate::graph::unified::string::id::StringId::new(start);
1553 let id_dup = crate::graph::unified::string::id::StringId::new(start + 1);
1554
1555 let fid = graph
1556 .files_mut()
1557 .register_external_with_uri(
1558 "/virtual/Foo.class",
1559 Some(crate::graph::node::Language::Java),
1560 Some(id_dup),
1561 )
1562 .expect("register external");
1563
1564 let rebuild = graph.clone_for_rebuild();
1565 let finalized = rebuild.finalize().expect("finalize ok");
1566
1567 let view = finalized
1568 .files()
1569 .file_provenance(fid)
1570 .expect("provenance present");
1571 assert_eq!(
1572 view.source_uri,
1573 Some(id_canon),
1574 "FileRegistry source_uri must be rewritten to canonical StringId"
1575 );
1576 }
1577
1578 // ----------------------------------------------------------------
1579 // Iter-4 B1: edge-store StringId remap across delta + CSR tiers.
1580 // ----------------------------------------------------------------
1581
1582 /// Construct a `RebuildGraph` whose interner has three slots
1583 /// containing the same canonical bytes ("symbol_edge_dup"), with
1584 /// two distinct `EdgeKind::Imports { alias }` edges pointing at
1585 /// different duplicate slots. Step 1 must rewrite both aliases
1586 /// through the dedup table so the post-step-9 CSR references only
1587 /// the canonical slot; the duplicate slots must be recycled by
1588 /// `recycle_unreferenced` (ref_count == 0) and the canonical slot
1589 /// must retain references.
1590 ///
1591 /// This test intentionally exercises both the delta-tier write path
1592 /// (`DeltaBuffer::iter_mut`) *during* step 1 (where the edges still
1593 /// live in the delta buffer because no compaction has been run)
1594 /// and the CSR-tier read path (where the edges land after step 9's
1595 /// `swap_csrs_and_clear_deltas`). The companion test
1596 /// `step1_remaps_edge_kind_string_ids_in_csr_tier` covers the
1597 /// symmetric case where edges start in the CSR tier at the time
1598 /// step 1 runs.
1599 #[test]
1600 fn step1_remaps_edge_kind_string_ids_through_dedup_table() {
1601 use crate::graph::unified::edge::EdgeKind;
1602 use crate::graph::unified::storage::interner::StringInterner;
1603
1604 let mut graph = CodeGraph::new();
1605 let file_a = FileId::new(1);
1606 // Force three slots carrying the same canonical bytes so
1607 // `build_dedup_table()` collapses slot[start+1] and slot[start+2]
1608 // onto slot[start].
1609 let interner: &mut StringInterner = graph.strings_mut();
1610 let start = interner.alloc_range(3).expect("alloc range");
1611 {
1612 let (s, rc) = interner.bulk_slices_mut(start, 3);
1613 s[0] = Some(std::sync::Arc::from("symbol_edge_dup"));
1614 s[1] = Some(std::sync::Arc::from("symbol_edge_dup"));
1615 s[2] = Some(std::sync::Arc::from("symbol_edge_dup"));
1616 rc[0] = 1;
1617 rc[1] = 1;
1618 rc[2] = 1;
1619 }
1620 let id_canon = crate::graph::unified::string::id::StringId::new(start);
1621 let id_dup_1 = crate::graph::unified::string::id::StringId::new(start + 1);
1622 let id_dup_2 = crate::graph::unified::string::id::StringId::new(start + 2);
1623
1624 // Allocate two nodes so we can hang two edges off distinct
1625 // (src, tgt) pairs and verify both are remapped.
1626 let src;
1627 let tgt;
1628 {
1629 let arena = graph.nodes_mut();
1630 src = arena
1631 .alloc(NodeEntry::new(NodeKind::Function, id_canon, file_a))
1632 .expect("alloc src");
1633 tgt = arena
1634 .alloc(NodeEntry::new(NodeKind::Function, id_canon, file_a))
1635 .expect("alloc tgt");
1636 }
1637 graph
1638 .indices_mut()
1639 .add(src, NodeKind::Function, id_canon, None, file_a);
1640 graph
1641 .indices_mut()
1642 .add(tgt, NodeKind::Function, id_canon, None, file_a);
1643
1644 // Edge 1: Imports { alias = id_dup_1 } — forward src→tgt.
1645 graph.edges_mut().add_edge(
1646 src,
1647 tgt,
1648 EdgeKind::Imports {
1649 alias: Some(id_dup_1),
1650 is_wildcard: false,
1651 },
1652 file_a,
1653 );
1654 // Edge 2: Imports { alias = id_dup_2 } — forward tgt→src (so the
1655 // two edges are distinct keys in the store).
1656 graph.edges_mut().add_edge(
1657 tgt,
1658 src,
1659 EdgeKind::Imports {
1660 alias: Some(id_dup_2),
1661 is_wildcard: true,
1662 },
1663 file_a,
1664 );
1665
1666 // Pre-finalize sanity: the edges live in the delta tier.
1667 assert!(
1668 graph.edges().forward().delta_count() >= 2,
1669 "pre-finalize: forward delta must hold both Imports edges"
1670 );
1671 assert!(
1672 graph.edges().reverse().delta_count() >= 2,
1673 "pre-finalize: reverse delta must hold the mirror of both Imports edges"
1674 );
1675
1676 let rebuild = graph.clone_for_rebuild();
1677 let finalized = rebuild.finalize().expect("finalize ok");
1678
1679 // After finalize step 9, edges have been absorbed into the CSR
1680 // tier by `swap_csrs_and_clear_deltas`. Step 1's `EdgeKind`
1681 // rewrite ran on the delta *before* step 9 snapshotted it, so
1682 // the CSR we assert on is built from the already-remapped delta.
1683 //
1684 // Forward CSR: every `Imports` alias must be canonical.
1685 let forward = finalized.edges().forward();
1686 let fwd_csr = forward
1687 .csr()
1688 .expect("forward CSR must be populated after step 9");
1689 let mut fwd_imports_seen = 0usize;
1690 for kind in fwd_csr.edge_kind() {
1691 if let EdgeKind::Imports { alias, .. } = kind {
1692 fwd_imports_seen += 1;
1693 assert_ne!(
1694 *alias,
1695 Some(id_dup_1),
1696 "forward CSR Imports alias must not reference pre-dedup slot id_dup_1"
1697 );
1698 assert_ne!(
1699 *alias,
1700 Some(id_dup_2),
1701 "forward CSR Imports alias must not reference pre-dedup slot id_dup_2"
1702 );
1703 assert_eq!(
1704 *alias,
1705 Some(id_canon),
1706 "forward CSR Imports alias must be canonicalised"
1707 );
1708 }
1709 }
1710 drop(forward);
1711 assert!(
1712 fwd_imports_seen >= 2,
1713 "forward CSR must hold both Imports edges after finalize (saw {fwd_imports_seen})"
1714 );
1715
1716 // Reverse CSR: mirror edges must be canonicalised identically.
1717 let reverse = finalized.edges().reverse();
1718 let rev_csr = reverse
1719 .csr()
1720 .expect("reverse CSR must be populated after step 9");
1721 let mut rev_imports_seen = 0usize;
1722 for kind in rev_csr.edge_kind() {
1723 if let EdgeKind::Imports { alias, .. } = kind {
1724 rev_imports_seen += 1;
1725 assert_ne!(*alias, Some(id_dup_1));
1726 assert_ne!(*alias, Some(id_dup_2));
1727 assert_eq!(*alias, Some(id_canon));
1728 }
1729 }
1730 drop(reverse);
1731 assert!(
1732 rev_imports_seen >= 2,
1733 "reverse CSR must mirror the forward Imports edges (saw {rev_imports_seen})"
1734 );
1735
1736 // Step-1 (c) invariant: after `recycle_unreferenced`, the
1737 // duplicate slots must report `ref_count == 0`. If step 1's
1738 // edge-store remap had skipped `EdgeKind` payloads, the
1739 // duplicates' refcounts would stay positive (held by the edge
1740 // store) and recycle would not reclaim them.
1741 assert_eq!(
1742 finalized.strings().ref_count(id_dup_1),
1743 0,
1744 "duplicate slot id_dup_1 must be recycled (ref_count == 0) by step 1"
1745 );
1746 assert_eq!(
1747 finalized.strings().ref_count(id_dup_2),
1748 0,
1749 "duplicate slot id_dup_2 must be recycled (ref_count == 0) by step 1"
1750 );
1751 assert!(
1752 finalized.strings().ref_count(id_canon) > 0,
1753 "canonical slot must retain references from the two Imports edges (got {})",
1754 finalized.strings().ref_count(id_canon)
1755 );
1756 }
1757
1758 /// Same construction as above, but compact the edges into a CSR tier
1759 /// before calling finalize so the rewrite covers the steady-state
1760 /// `CsrGraph::edge_kind` path. This exercises the branch of
1761 /// `BidirectionalEdgeStore::rewrite_edge_kind_string_ids_through_remap`
1762 /// that is otherwise dead in the tests above.
1763 #[test]
1764 fn step1_remaps_edge_kind_string_ids_in_csr_tier() {
1765 use crate::graph::unified::compaction::{Direction, build_compacted_csr, snapshot_edges};
1766 use crate::graph::unified::edge::EdgeKind;
1767 use crate::graph::unified::storage::interner::StringInterner;
1768
1769 let mut graph = CodeGraph::new();
1770 let file_a = FileId::new(1);
1771 let interner: &mut StringInterner = graph.strings_mut();
1772 let start = interner.alloc_range(2).expect("alloc range");
1773 {
1774 let (s, rc) = interner.bulk_slices_mut(start, 2);
1775 s[0] = Some(std::sync::Arc::from("csr_alias_dup"));
1776 s[1] = Some(std::sync::Arc::from("csr_alias_dup"));
1777 rc[0] = 1;
1778 rc[1] = 1;
1779 }
1780 let id_canon = crate::graph::unified::string::id::StringId::new(start);
1781 let id_dup = crate::graph::unified::string::id::StringId::new(start + 1);
1782
1783 let src;
1784 let tgt;
1785 {
1786 let arena = graph.nodes_mut();
1787 src = arena
1788 .alloc(NodeEntry::new(NodeKind::Function, id_canon, file_a))
1789 .expect("alloc src");
1790 tgt = arena
1791 .alloc(NodeEntry::new(NodeKind::Function, id_canon, file_a))
1792 .expect("alloc tgt");
1793 }
1794 graph
1795 .indices_mut()
1796 .add(src, NodeKind::Function, id_canon, None, file_a);
1797 graph
1798 .indices_mut()
1799 .add(tgt, NodeKind::Function, id_canon, None, file_a);
1800
1801 // Push the Imports edge into delta with alias pointing at the
1802 // duplicate StringId slot.
1803 graph.edges_mut().add_edge(
1804 src,
1805 tgt,
1806 EdgeKind::Imports {
1807 alias: Some(id_dup),
1808 is_wildcard: false,
1809 },
1810 file_a,
1811 );
1812
1813 // Compact delta → CSR on both directions. This mirrors what the
1814 // full-build pipeline does after Phase 4d plus an explicit
1815 // compaction step.
1816 let node_count = 2;
1817 let edges = graph.edges_mut();
1818 let fwd_snap = snapshot_edges(&edges.forward(), node_count);
1819 let (forward_csr, _) =
1820 build_compacted_csr(&fwd_snap, Direction::Forward).expect("forward csr");
1821 let rev_snap = snapshot_edges(&edges.reverse(), node_count);
1822 let (reverse_csr, _) =
1823 build_compacted_csr(&rev_snap, Direction::Reverse).expect("reverse csr");
1824 edges.swap_csrs_and_clear_deltas(forward_csr, reverse_csr);
1825 assert!(
1826 edges.forward().csr().is_some(),
1827 "forward CSR must be present"
1828 );
1829 assert!(
1830 edges.reverse().csr().is_some(),
1831 "reverse CSR must be present"
1832 );
1833 assert_eq!(edges.forward().delta_count(), 0, "delta must be empty");
1834 assert_eq!(edges.reverse().delta_count(), 0, "delta must be empty");
1835
1836 let rebuild = graph.clone_for_rebuild();
1837 let finalized = rebuild.finalize().expect("finalize ok");
1838
1839 // CSR tier: every `edge_kind` entry must reference the canonical
1840 // StringId, never the duplicate.
1841 let forward = finalized.edges().forward();
1842 let csr = forward
1843 .csr()
1844 .expect("forward CSR must survive finalize step 1");
1845 let mut imports_seen = 0usize;
1846 for kind in csr.edge_kind() {
1847 if let EdgeKind::Imports { alias, .. } = kind {
1848 imports_seen += 1;
1849 assert_ne!(
1850 *alias,
1851 Some(id_dup),
1852 "CSR Imports alias must not reference pre-dedup duplicate"
1853 );
1854 assert_eq!(
1855 *alias,
1856 Some(id_canon),
1857 "CSR Imports alias must be canonicalised"
1858 );
1859 }
1860 }
1861 assert!(
1862 imports_seen > 0,
1863 "forward CSR must hold at least one Imports edge"
1864 );
1865 drop(forward);
1866
1867 // Reverse CSR — same guarantee.
1868 let reverse = finalized.edges().reverse();
1869 let rev_csr = reverse
1870 .csr()
1871 .expect("reverse CSR must survive finalize step 1");
1872 let mut rev_imports_seen = 0usize;
1873 for kind in rev_csr.edge_kind() {
1874 if let EdgeKind::Imports { alias, .. } = kind {
1875 rev_imports_seen += 1;
1876 assert_ne!(*alias, Some(id_dup));
1877 assert_eq!(*alias, Some(id_canon));
1878 }
1879 }
1880 assert!(rev_imports_seen > 0, "reverse CSR must hold Imports edges");
1881 drop(reverse);
1882
1883 assert_eq!(
1884 finalized.strings().ref_count(id_dup),
1885 0,
1886 "duplicate slot must be recycled (ref_count == 0) by step 1"
1887 );
1888 }
1889
1890 /// Exhaustive coverage: every `StringId`-holding surface *reachable
1891 /// through `CodeGraph`'s public mutation API* must be rewritten by
1892 /// finalize step 1. This test drives each surface with a distinct
1893 /// duplicate `StringId` slot and verifies after finalize that (i)
1894 /// every surface references the canonical slot, never the duplicate;
1895 /// (ii) every duplicate slot is recycled (`ref_count == 0`); (iii)
1896 /// the canonical slot retains a positive ref count.
1897 ///
1898 /// Surfaces exercised:
1899 /// * **Arena** (`NodeEntry.name`, `NodeEntry.qualified_name`)
1900 /// * **AuxiliaryIndices** (`name_index` / `qualified_name_index` keys)
1901 /// * **FileRegistry** (`FileEntry.source_uri`)
1902 /// * **BidirectionalEdgeStore** (`EdgeKind::Imports.alias` — delta tier)
1903 ///
1904 /// `AliasTable` and `ShadowTable` StringId fields live under private
1905 /// API (`pub(crate) fn set_alias_table`, no public entry mutators)
1906 /// and are already covered by the iter-2 `step1_remaps_*` tests above
1907 /// that exercise their `rewrite_string_ids_through_remap` helpers
1908 /// end-to-end through the binding-plane derivation path. Covering
1909 /// them here would require either exposing additional test-only APIs
1910 /// or duplicating the iter-2 coverage; neither adds assurance beyond
1911 /// what the dedicated iter-2 tests already provide.
1912 #[test]
1913 fn step1_remaps_all_stringid_holders_exhaustively() {
1914 use crate::graph::unified::edge::EdgeKind;
1915 use crate::graph::unified::storage::interner::StringInterner;
1916
1917 let mut graph = CodeGraph::new();
1918 let file_a = FileId::new(1);
1919 let interner: &mut StringInterner = graph.strings_mut();
1920 // 10 slots = 5 surfaces × 2 (canon + dup) interleaved.
1921 let start = interner.alloc_range(10).expect("alloc range");
1922 {
1923 let (s, rc) = interner.bulk_slices_mut(start, 10);
1924 for (i, label) in [
1925 "arena_name",
1926 "arena_name",
1927 "arena_qname",
1928 "arena_qname",
1929 "idx_key",
1930 "idx_key",
1931 "edge_alias",
1932 "edge_alias",
1933 "file_uri",
1934 "file_uri",
1935 ]
1936 .iter()
1937 .enumerate()
1938 {
1939 s[i] = Some(std::sync::Arc::from(*label));
1940 rc[i] = 1;
1941 }
1942 }
1943 let sid = |off: u32| crate::graph::unified::string::id::StringId::new(start + off);
1944 let arena_name_canon = sid(0);
1945 let arena_name_dup = sid(1);
1946 let arena_qname_canon = sid(2);
1947 let arena_qname_dup = sid(3);
1948 let idx_key_canon = sid(4);
1949 let idx_key_dup = sid(5);
1950 let edge_alias_canon = sid(6);
1951 let edge_alias_dup = sid(7);
1952 let file_uri_canon = sid(8);
1953 let file_uri_dup = sid(9);
1954
1955 // (a) Arena — name + qualified_name reference duplicate slots.
1956 let node;
1957 let node2;
1958 {
1959 let arena = graph.nodes_mut();
1960 let mut entry = NodeEntry::new(NodeKind::Function, arena_name_dup, file_a);
1961 entry.qualified_name = Some(arena_qname_dup);
1962 node = arena.alloc(entry).expect("alloc arena node");
1963 node2 = arena
1964 .alloc(NodeEntry::new(NodeKind::Function, arena_name_dup, file_a))
1965 .expect("alloc arena node2");
1966 }
1967 // (b) Indices — key under the duplicate StringId.
1968 graph
1969 .indices_mut()
1970 .add(node, NodeKind::Function, idx_key_dup, None, file_a);
1971 graph
1972 .indices_mut()
1973 .add(node2, NodeKind::Function, idx_key_dup, None, file_a);
1974 // (c) File registry — register a file with source_uri = dup.
1975 let fid = graph
1976 .files_mut()
1977 .register_external_with_uri(
1978 "/virtual/Exhaustive.class",
1979 Some(crate::graph::node::Language::Java),
1980 Some(file_uri_dup),
1981 )
1982 .expect("register external");
1983 // (d) Edge store — Imports edge with alias = dup.
1984 graph.edges_mut().add_edge(
1985 node,
1986 node2,
1987 EdgeKind::Imports {
1988 alias: Some(edge_alias_dup),
1989 is_wildcard: false,
1990 },
1991 file_a,
1992 );
1993
1994 let rebuild = graph.clone_for_rebuild();
1995 let finalized = rebuild.finalize().expect("finalize ok");
1996
1997 // (a) Arena: name + qualified_name on every live node.
1998 for nid in [node, node2] {
1999 let entry = finalized.nodes().get(nid).expect("node survives");
2000 assert_eq!(entry.name, arena_name_canon, "arena name canonicalised");
2001 if let Some(q) = entry.qualified_name {
2002 assert_eq!(q, arena_qname_canon, "arena qname canonicalised");
2003 }
2004 }
2005 // (b) Indices: canonical bucket populated, duplicate bucket empty.
2006 assert!(finalized.indices().by_name(idx_key_dup).is_empty());
2007 assert!(!finalized.indices().by_name(idx_key_canon).is_empty());
2008 // (c) File registry.
2009 let view = finalized
2010 .files()
2011 .file_provenance(fid)
2012 .expect("provenance present");
2013 assert_eq!(view.source_uri, Some(file_uri_canon));
2014 // (d) Edge store: every Imports alias canonical on both
2015 // directions. Post-finalize, edges live in CSR (step 9 drained
2016 // the delta into the CSR on both directions). Step 1's remap
2017 // ran on the delta *before* step 9, so the CSR is built from
2018 // the already-remapped delta.
2019 let fwd = finalized.edges().forward();
2020 let fwd_csr = fwd
2021 .csr()
2022 .expect("forward CSR must be populated after step 9");
2023 let mut fwd_imports = 0usize;
2024 for kind in fwd_csr.edge_kind() {
2025 if let EdgeKind::Imports { alias, .. } = kind {
2026 fwd_imports += 1;
2027 assert_ne!(*alias, Some(edge_alias_dup));
2028 assert_eq!(*alias, Some(edge_alias_canon));
2029 }
2030 }
2031 drop(fwd);
2032 assert!(fwd_imports > 0, "forward Imports must be present in CSR");
2033 let rev = finalized.edges().reverse();
2034 let rev_csr = rev
2035 .csr()
2036 .expect("reverse CSR must be populated after step 9");
2037 let mut rev_imports = 0usize;
2038 for kind in rev_csr.edge_kind() {
2039 if let EdgeKind::Imports { alias, .. } = kind {
2040 rev_imports += 1;
2041 assert_ne!(*alias, Some(edge_alias_dup));
2042 assert_eq!(*alias, Some(edge_alias_canon));
2043 }
2044 }
2045 drop(rev);
2046 assert!(rev_imports > 0, "reverse Imports must be present in CSR");
2047
2048 // All duplicate slots recycled; canonical slots retained.
2049 for (dup, canon, label) in [
2050 (arena_name_dup, arena_name_canon, "arena_name"),
2051 (arena_qname_dup, arena_qname_canon, "arena_qname"),
2052 (idx_key_dup, idx_key_canon, "idx_key"),
2053 (edge_alias_dup, edge_alias_canon, "edge_alias"),
2054 (file_uri_dup, file_uri_canon, "file_uri"),
2055 ] {
2056 assert_eq!(
2057 finalized.strings().ref_count(dup),
2058 0,
2059 "{label}: duplicate slot must be recycled (ref_count == 0)"
2060 );
2061 assert!(
2062 finalized.strings().ref_count(canon) > 0,
2063 "{label}: canonical slot must retain references (got {})",
2064 finalized.strings().ref_count(canon)
2065 );
2066 }
2067 }
2068
2069 // ----------------------------------------------------------------
2070 // Iter-2 B2: step 6 + step 13 per_file_nodes / bucket bijection.
2071 // ----------------------------------------------------------------
2072
2073 #[test]
2074 fn finalize_step6_drops_tombstoned_nodes_from_buckets() {
2075 let (graph, a, b, c) = seeded_graph();
2076 // Seed the registry buckets so finalize step 6 has real work.
2077 let file_a = FileId::new(1);
2078 let file_b = FileId::new(2);
2079 {
2080 // Direct access via clone-for-rebuild — we need to populate
2081 // the REBUILD-local registry because `seeded_graph` doesn't.
2082 }
2083 // Populate buckets on the graph first so clone captures them.
2084 {
2085 let files = graph.files();
2086 let _ = files; // ensure immut ref scope
2087 }
2088 let mut graph = graph;
2089 graph.files_mut().record_node(file_a, a);
2090 graph.files_mut().record_node(file_a, b);
2091 graph.files_mut().record_node(file_b, c);
2092
2093 let mut rebuild = graph.clone_for_rebuild();
2094 rebuild.tombstone(a);
2095 rebuild.tombstone(c);
2096 let finalized = rebuild.finalize().expect("finalize ok");
2097
2098 // a and c were tombstoned — must be absent from any bucket.
2099 // b survives under file_a; file_b's bucket collapsed to empty
2100 // and must be dropped.
2101 let buckets: std::collections::BTreeMap<FileId, Vec<crate::graph::unified::node::NodeId>> =
2102 finalized.files().per_file_nodes_for_gate0d().collect();
2103 assert_eq!(buckets.len(), 1, "empty bucket must be dropped");
2104 assert_eq!(buckets.get(&file_a).cloned().unwrap_or_default(), vec![b]);
2105 assert!(!buckets.contains_key(&file_b));
2106 }
2107
2108 #[test]
2109 fn finalize_step6_dedups_within_bucket() {
2110 let (mut graph, a, b, c) = seeded_graph();
2111 let file_a = FileId::new(1);
2112 let file_b = FileId::new(2);
2113 // Bucket every live node consistently with seeded_graph's own
2114 // FileId assignment (so the non-vacuous bijection check passes),
2115 // but duplicate `a` once to exercise step 6's dedup path.
2116 graph.files_mut().record_node(file_a, a);
2117 graph.files_mut().record_node(file_a, a); // intentional duplicate
2118 graph.files_mut().record_node(file_a, b);
2119 graph.files_mut().record_node(file_b, c);
2120 let rebuild = graph.clone_for_rebuild();
2121 let finalized = rebuild.finalize().expect("finalize ok");
2122 let buckets: std::collections::BTreeMap<FileId, Vec<crate::graph::unified::node::NodeId>> =
2123 finalized.files().per_file_nodes_for_gate0d().collect();
2124 let bucket_a = buckets.get(&file_a).expect("bucket for file_a");
2125 // Two live nodes (a, b) in file_a, duplicate `a` collapsed.
2126 assert_eq!(
2127 bucket_a.len(),
2128 2,
2129 "duplicates within bucket must be dedup'd"
2130 );
2131 assert!(bucket_a.contains(&a));
2132 assert!(bucket_a.contains(&b));
2133 }
2134
2135 #[test]
2136 fn finalize_step6_drops_empty_buckets() {
2137 let (mut graph, a, _b, _c) = seeded_graph();
2138 let file = FileId::new(1);
2139 graph.files_mut().record_node(file, a);
2140 let mut rebuild = graph.clone_for_rebuild();
2141 rebuild.tombstone(a); // drops the only bucket member
2142 let finalized = rebuild.finalize().expect("finalize ok");
2143 assert_eq!(
2144 finalized.files().per_file_bucket_count(),
2145 0,
2146 "empty bucket must be dropped by step 6"
2147 );
2148 }
2149
2150 #[test]
2151 fn bucket_bijection_passes_when_every_live_node_is_bucketed() {
2152 let (mut graph, a, b, c) = seeded_graph();
2153 // Bucket every live node consistently with each node's FileId.
2154 // seeded_graph uses file_a=1 for a, b and file_b=2 for c.
2155 let file_a = FileId::new(1);
2156 let file_b = FileId::new(2);
2157 graph.files_mut().record_node(file_a, a);
2158 graph.files_mut().record_node(file_a, b);
2159 graph.files_mut().record_node(file_b, c);
2160
2161 // Round-trip through finalize so the bijection check runs.
2162 let rebuild = graph.clone_for_rebuild();
2163 let finalized = rebuild.finalize().expect("finalize ok");
2164 // Explicit assert_bucket_bijection call also passes.
2165 finalized.assert_bucket_bijection();
2166 }
2167
2168 #[test]
2169 #[should_panic(expected = "duplicate node")]
2170 fn bucket_bijection_detects_duplicate_within_bucket() {
2171 let (mut graph, a, _b, _c) = seeded_graph();
2172 let file_a = FileId::new(1);
2173 // Manually force a duplicate inside a bucket by bypassing
2174 // `retain_nodes_in_buckets`. We achieve this by calling
2175 // `record_node` twice on the live graph and then invoking the
2176 // bijection check directly *without* routing through finalize
2177 // (which would dedup in step 6).
2178 graph.files_mut().record_node(file_a, a);
2179 graph.files_mut().record_node(file_a, a);
2180 graph.assert_bucket_bijection();
2181 }
2182
2183 #[test]
2184 #[should_panic(expected = "misfiled")]
2185 fn bucket_bijection_detects_misfiled_node() {
2186 let (mut graph, a, _b, _c) = seeded_graph();
2187 // Node `a` was allocated against FileId(1); put it in a bucket
2188 // keyed by FileId(99) — the bijection must reject with the
2189 // precise "misfiled" panic message emitted by
2190 // `CodeGraph::assert_bucket_bijection` (concurrent/graph.rs
2191 // around line 865). A broader `expected = "node"` would also
2192 // match the duplicate-node / dead-node / multi-bucket arms, so
2193 // it would not discriminate the specific failure mode under test.
2194 graph.files_mut().record_node(FileId::new(99), a);
2195 graph.assert_bucket_bijection();
2196 }
2197
2198 #[test]
2199 #[should_panic(expected = "absent from all buckets")]
2200 fn bucket_bijection_detects_missing_live_node() {
2201 let (mut graph, a, _b, c) = seeded_graph();
2202 // Bucket one node but not the other; with at least one populated
2203 // bucket, condition (d) becomes strict and the missing live node
2204 // must trigger a panic.
2205 let file_a = FileId::new(1);
2206 let _ = c;
2207 graph.files_mut().record_node(file_a, a);
2208 graph.assert_bucket_bijection();
2209 }
2210
2211 #[test]
2212 #[should_panic(expected = "dead node")]
2213 fn bucket_bijection_detects_dead_node_in_bucket() {
2214 let (mut graph, a, _b, _c) = seeded_graph();
2215 // Tombstone a, then leave it in a bucket — bijection must reject.
2216 let file_a = FileId::new(1);
2217 graph.files_mut().record_node(file_a, a);
2218 graph.nodes_mut().remove(a);
2219 graph.assert_bucket_bijection();
2220 }
2221
2222 // -----------------------------------------------------------------
2223 // Gate 0d — Tombstone-residue negative tests.
2224 //
2225 // Covers both the `RebuildGraph::assert_no_tombstone_residue`
2226 // diagnostic helper and the finalize step-14 publish-boundary
2227 // check. The bijection counterparts live in the block immediately
2228 // above.
2229 // -----------------------------------------------------------------
2230
2231 #[test]
2232 #[should_panic(expected = "still in edge store")]
2233 fn rebuild_graph_residue_detects_tombstone_still_in_edge_store() {
2234 // Gate 0d iter-1 Minor — dedicated negative test for the
2235 // edge-store residue arm.
2236 //
2237 // The residue check iterates K-rows in order (see
2238 // `assert_no_tombstone_residue` above): NodeArena → edges →
2239 // AuxiliaryIndices → macro_metadata → ... — so to exercise the
2240 // `edges` arm specifically we must remove `a` from the
2241 // NodeArena first (so the NodeArena arm does not fire), leave
2242 // the edge that references `a` intact, and tombstone it.
2243 use crate::graph::unified::edge::kind::EdgeKind;
2244 let (graph, a, b, _c) = seeded_graph();
2245 let mut rebuild = graph.clone_for_rebuild();
2246 // Seed the edge store with an edge that references `a`. This
2247 // becomes the "dangling reference" the residue check must
2248 // detect: `a` will be removed from the arena but the edge
2249 // keeps pointing at it.
2250 rebuild.edges.add_edge(
2251 a,
2252 b,
2253 EdgeKind::Calls {
2254 argument_count: 0,
2255 is_async: false,
2256 },
2257 FileId::new(1),
2258 );
2259 // Remove `a` from the arena so the K.A1 arm passes.
2260 rebuild.nodes.remove(a);
2261 rebuild.tombstone(a);
2262 rebuild.assert_no_tombstone_residue();
2263 }
2264
2265 #[test]
2266 #[should_panic(expected = "still in auxiliary indices")]
2267 fn rebuild_graph_residue_detects_tombstone_still_in_auxiliary_indices() {
2268 // The residue check iterates K-rows starting at `NodeArena`, so
2269 // to exercise the `AuxiliaryIndices` arm specifically we must
2270 // first remove `a` from the arena (simulating step 2 of
2271 // finalize), leaving the `AuxiliaryIndices` stale reference
2272 // as the first arm that can trip. This reproduces the exact
2273 // failure mode the pre-finalize helper exists to catch: a bug
2274 // where finalize compacts the arena but forgets one index.
2275 let (graph, a, _b, _c) = seeded_graph();
2276 let mut rebuild = graph.clone_for_rebuild();
2277 rebuild.nodes.remove(a);
2278 rebuild.tombstone(a);
2279 rebuild.assert_no_tombstone_residue();
2280 }
2281
2282 #[test]
2283 #[should_panic(expected = "still in NodeArena")]
2284 fn rebuild_graph_residue_detects_tombstone_still_in_node_arena() {
2285 // NodeArena is the first `NodeIdBearing` in the residue check's
2286 // iteration order, so we can trip it with a raw `tombstone(a)`
2287 // call before we touch any auxiliary index. The live arena
2288 // entry for `a` remains — that is the bug we want to surface.
2289 let (graph, a, _b, _c) = seeded_graph();
2290 let mut rebuild = graph.clone_for_rebuild();
2291 // Prove the arena still contains `a` before the assertion so
2292 // the panic expectation maps to a real condition, not a vacuous
2293 // empty-tombstone skip.
2294 assert!(
2295 rebuild.nodes.get(a).is_some(),
2296 "pre-condition: arena must still contain the staged tombstone"
2297 );
2298 rebuild.tombstone(a);
2299 rebuild.assert_no_tombstone_residue();
2300 }
2301
2302 #[test]
2303 fn rebuild_graph_residue_is_noop_on_empty_tombstone_set() {
2304 // Positive: with no tombstones staged, the assertion must be a
2305 // no-op — otherwise every fresh `clone_for_rebuild` would panic.
2306 let (graph, _a, _b, _c) = seeded_graph();
2307 let rebuild = graph.clone_for_rebuild();
2308 assert_eq!(rebuild.pending_tombstone_count(), 0);
2309 rebuild.assert_no_tombstone_residue();
2310 }
2311
2312 #[test]
2313 #[should_panic(expected = "still in NodeArena")]
2314 fn finalize_step14_residue_detects_live_reference_to_drained_node() {
2315 // Drive the `finalize` step-14 residue assertion through the
2316 // publish-boundary helper. `seeded_graph()` returns a graph in
2317 // which `a` is a live NodeArena entry; passing `a` in the
2318 // `drained` set without actually removing the arena slot
2319 // simulates the bug step 14 exists to catch — step 8 drained
2320 // `a` into `drained_tombstones`, but something failed to
2321 // compact the arena. The residue check's K-row iteration
2322 // starts at `NodeArena`, so that is the arm that fires.
2323 //
2324 // The test routes through the canonical
2325 // `publish::assert_publish_invariants` helper so the test
2326 // exercises the exact code path `finalize` step 14 executes,
2327 // not just the underlying `assert_no_tombstone_residue_for`
2328 // entry point.
2329 let (graph, a, _b, _c) = seeded_graph();
2330 let mut drained: ::std::collections::HashSet<NodeId> = ::std::collections::HashSet::new();
2331 drained.insert(a);
2332 crate::graph::unified::publish::assert_publish_invariants(&graph, &drained);
2333 }
2334
2335 #[test]
2336 fn finalize_with_empty_drained_set_passes_publish_invariants() {
2337 // Positive smoke test: the happy path through finalize (no
2338 // tombstones staged) must pass `assert_publish_invariants`
2339 // unconditionally on every build profile. This is the case the
2340 // full-rebuild `build_unified_graph_inner` end-of-function call
2341 // exercises on every CI run.
2342 let (graph, _a, _b, _c) = seeded_graph();
2343 let file_a = FileId::new(1);
2344 let file_b = FileId::new(2);
2345 let mut graph = graph;
2346 graph.files_mut().record_node(file_a, _a);
2347 graph.files_mut().record_node(file_a, _b);
2348 graph.files_mut().record_node(file_b, _c);
2349
2350 let rebuild = graph.clone_for_rebuild();
2351 let finalized = rebuild.finalize().expect("finalize ok");
2352 crate::graph::unified::publish::assert_publish_invariants(
2353 &finalized,
2354 &::std::collections::HashSet::new(),
2355 );
2356 }
2357
2358 // ---- Task 4 Step 3 — RebuildGraph::remove_file ------------------
2359
2360 /// Seed a graph + rebuild with 2 files × `per_file` nodes and a
2361 /// mix of intra- and cross-file `Calls` edges, then clone for
2362 /// rebuild. Returns `(rebuild, file_a, file_b, file_a_nodes,
2363 /// file_b_nodes)` — mirrors `seed_two_file_graph` in the
2364 /// `concurrent::graph::tests` module but yields the rebuild value
2365 /// so tests here can drive `RebuildGraph::remove_file` directly.
2366 fn seed_two_file_rebuild(
2367 per_file: usize,
2368 ) -> (
2369 RebuildGraph,
2370 crate::graph::unified::file::FileId,
2371 crate::graph::unified::file::FileId,
2372 Vec<NodeId>,
2373 Vec<NodeId>,
2374 ) {
2375 use crate::graph::unified::edge::EdgeKind;
2376 use crate::graph::unified::node::NodeKind;
2377 use crate::graph::unified::storage::arena::NodeEntry;
2378 use std::path::Path;
2379
2380 let mut graph = CodeGraph::new();
2381 let sym = graph.strings_mut().intern("sym").expect("intern");
2382 let file_a = graph
2383 .files_mut()
2384 .register(Path::new("/tmp/rebuild_remove_file_test/a.rs"))
2385 .expect("register a");
2386 let file_b = graph
2387 .files_mut()
2388 .register(Path::new("/tmp/rebuild_remove_file_test/b.rs"))
2389 .expect("register b");
2390
2391 let mut file_a_nodes = Vec::with_capacity(per_file);
2392 let mut file_b_nodes = Vec::with_capacity(per_file);
2393 for _ in 0..per_file {
2394 let n = graph
2395 .nodes_mut()
2396 .alloc(NodeEntry::new(NodeKind::Function, sym, file_a))
2397 .expect("alloc a");
2398 file_a_nodes.push(n);
2399 graph.files_mut().record_node(file_a, n);
2400 graph
2401 .indices_mut()
2402 .add(n, NodeKind::Function, sym, None, file_a);
2403 }
2404 for _ in 0..per_file {
2405 let n = graph
2406 .nodes_mut()
2407 .alloc(NodeEntry::new(NodeKind::Function, sym, file_b))
2408 .expect("alloc b");
2409 file_b_nodes.push(n);
2410 graph.files_mut().record_node(file_b, n);
2411 graph
2412 .indices_mut()
2413 .add(n, NodeKind::Function, sym, None, file_b);
2414 }
2415 for i in 0..per_file.saturating_sub(1) {
2416 graph.edges_mut().add_edge(
2417 file_a_nodes[i],
2418 file_a_nodes[i + 1],
2419 EdgeKind::Calls {
2420 argument_count: 0,
2421 is_async: false,
2422 },
2423 file_a,
2424 );
2425 graph.edges_mut().add_edge(
2426 file_b_nodes[i],
2427 file_b_nodes[i + 1],
2428 EdgeKind::Calls {
2429 argument_count: 0,
2430 is_async: false,
2431 },
2432 file_b,
2433 );
2434 }
2435 graph.edges_mut().add_edge(
2436 file_a_nodes[0],
2437 file_b_nodes[0],
2438 EdgeKind::Calls {
2439 argument_count: 0,
2440 is_async: false,
2441 },
2442 file_a,
2443 );
2444 graph.edges_mut().add_edge(
2445 file_b_nodes[0],
2446 file_a_nodes[0],
2447 EdgeKind::Calls {
2448 argument_count: 0,
2449 is_async: false,
2450 },
2451 file_b,
2452 );
2453
2454 let rebuild = graph.clone_for_rebuild();
2455 (rebuild, file_a, file_b, file_a_nodes, file_b_nodes)
2456 }
2457
2458 #[test]
2459 fn rebuild_remove_file_tombstones_all_per_file_nodes() {
2460 let (mut rebuild, file_a, _file_b, file_a_nodes, _) = seed_two_file_rebuild(3);
2461
2462 let returned = rebuild.remove_file(file_a);
2463
2464 let returned_set: std::collections::HashSet<NodeId> = returned.iter().copied().collect();
2465 let expected_set: std::collections::HashSet<NodeId> =
2466 file_a_nodes.iter().copied().collect();
2467 assert_eq!(
2468 returned_set, expected_set,
2469 "remove_file must return exactly the file_a nodes drained from the bucket"
2470 );
2471 // Each returned NodeId is arena-gone on the rebuild.
2472 for nid in &file_a_nodes {
2473 assert!(
2474 rebuild.nodes.get(*nid).is_none(),
2475 "node {nid:?} from removed file must be tombstoned on rebuild arena"
2476 );
2477 }
2478 // And staged for the finalize-time NodeIdBearing sweep.
2479 assert_eq!(rebuild.pending_tombstone_count(), file_a_nodes.len());
2480 }
2481
2482 #[test]
2483 fn rebuild_remove_file_invalidates_all_edges_sourced_or_targeted_at_removed_nodes() {
2484 let (mut rebuild, file_a, _file_b, file_a_nodes, file_b_nodes) = seed_two_file_rebuild(3);
2485
2486 // Seed produces 6 forward delta edges (2 intra-A + 2 intra-B
2487 // + 2 cross). The rebuild's own forward/reverse edge stores
2488 // mirror this.
2489 assert_eq!(rebuild.edges.forward().delta().len(), 6);
2490
2491 let _ = rebuild.remove_file(file_a);
2492
2493 // After removal: only 2 intra-B edges remain in each direction.
2494 assert_eq!(rebuild.edges.forward().delta().len(), 2);
2495 assert_eq!(rebuild.edges.reverse().delta().len(), 2);
2496
2497 // Cross-file edge b[0] -> a[0] must be gone from any direction.
2498 let b0 = file_b_nodes[0];
2499 let a0 = file_a_nodes[0];
2500 let from_b0: Vec<_> = rebuild
2501 .edges
2502 .edges_from(b0)
2503 .into_iter()
2504 .filter(|e| e.target == a0)
2505 .collect();
2506 assert!(
2507 from_b0.is_empty(),
2508 "edge b0 -> a0 must be gone after rebuild.remove_file(file_a)"
2509 );
2510 }
2511
2512 #[test]
2513 fn rebuild_remove_file_drops_file_registry_entry() {
2514 let (mut rebuild, file_a, _file_b, _, _) = seed_two_file_rebuild(2);
2515
2516 assert!(rebuild.files.resolve(file_a).is_some());
2517 assert!(!rebuild.files.nodes_for_file(file_a).is_empty());
2518
2519 let _ = rebuild.remove_file(file_a);
2520
2521 assert!(
2522 rebuild.files.resolve(file_a).is_none(),
2523 "rebuild FileRegistry entry must be gone"
2524 );
2525 assert!(
2526 rebuild.files.nodes_for_file(file_a).is_empty(),
2527 "rebuild per-file bucket for file_a must be drained"
2528 );
2529 }
2530
2531 #[test]
2532 fn rebuild_remove_file_is_idempotent_on_unknown_file() {
2533 use crate::graph::unified::file::FileId;
2534 let (mut rebuild, _file_a, _file_b, _, _) = seed_two_file_rebuild(2);
2535
2536 let nodes_before = rebuild.nodes.len();
2537 let delta_fwd_before = rebuild.edges.forward().delta().len();
2538 let delta_rev_before = rebuild.edges.reverse().delta().len();
2539 let tombstones_before = rebuild.pending_tombstone_count();
2540
2541 let bogus = FileId::new(9999);
2542 let returned = rebuild.remove_file(bogus);
2543 assert!(returned.is_empty());
2544
2545 assert_eq!(rebuild.nodes.len(), nodes_before);
2546 assert_eq!(rebuild.edges.forward().delta().len(), delta_fwd_before);
2547 assert_eq!(rebuild.edges.reverse().delta().len(), delta_rev_before);
2548 assert_eq!(rebuild.pending_tombstone_count(), tombstones_before);
2549 }
2550
2551 #[test]
2552 fn rebuild_remove_file_stages_tombstones_for_finalize_sweep() {
2553 // The whole point of RebuildGraph::remove_file deferring the
2554 // K.A/K.B sweep to finalize() is that the sweep happens exactly
2555 // once against the union of every file's tombstones. Drive this
2556 // end-to-end: remove both files, finalize, and assert the
2557 // publish-boundary invariants hold.
2558 let (mut rebuild, file_a, file_b, file_a_nodes, file_b_nodes) = seed_two_file_rebuild(2);
2559
2560 let _ = rebuild.remove_file(file_a);
2561 let _ = rebuild.remove_file(file_b);
2562
2563 // Pending tombstones: union of both files' nodes.
2564 assert_eq!(
2565 rebuild.pending_tombstone_count(),
2566 file_a_nodes.len() + file_b_nodes.len()
2567 );
2568
2569 // Pre-finalize residue check against the rebuild-local state
2570 // is expected to pass: every NodeIdBearing surface on the
2571 // rebuild must already be clean of the tombstoned IDs (via the
2572 // immediate arena + edge tombstoning in step 1–2 of
2573 // remove_file) — NO, the NodeIdBearing K.A/K.B surfaces beyond
2574 // NodeArena/edges are NOT touched before finalize. The
2575 // assert_no_tombstone_residue helper on RebuildGraph walks
2576 // every surface, so it will legitimately find tombstones in
2577 // the auxiliary indices + metadata stores until finalize runs.
2578 //
2579 // So: do NOT run the pre-finalize residue check here. Instead,
2580 // confirm finalize runs successfully and the assembled
2581 // CodeGraph passes the publish-boundary invariants against
2582 // the drained set — that is the real post-condition.
2583 let finalized = rebuild.finalize().expect("finalize must succeed");
2584
2585 // Every surface on the finalized CodeGraph must be clean of the
2586 // tombstoned ids. Use the publish-boundary residue helper
2587 // against an empty dead set (finalize's own step-14 call already
2588 // covered the drained set; we re-verify the bijection for
2589 // paranoia).
2590 crate::graph::unified::publish::assert_publish_bijection(&finalized);
2591 // Arena is empty (every seeded node was tombstoned).
2592 assert_eq!(finalized.nodes().len(), 0);
2593 // No file registration survives.
2594 assert!(finalized.files().resolve(file_a).is_none());
2595 assert!(finalized.files().resolve(file_b).is_none());
2596 }
2597
2598 #[test]
2599 fn rebuild_remove_file_repeated_calls_are_idempotent() {
2600 let (mut rebuild, file_a, _file_b, file_a_nodes, _) = seed_two_file_rebuild(3);
2601
2602 let first = rebuild.remove_file(file_a);
2603 assert_eq!(first.len(), file_a_nodes.len());
2604 let staged = rebuild.pending_tombstone_count();
2605
2606 // Second call: bucket is already drained, NodeArena::remove
2607 // ignores stale generations, and the tombstones set should not
2608 // grow. The immediate tombstone side effects are idempotent.
2609 let second = rebuild.remove_file(file_a);
2610 assert!(second.is_empty());
2611 assert_eq!(rebuild.pending_tombstone_count(), staged);
2612 }
2613
2614 #[test]
2615 fn rebuild_remove_file_clears_file_segments_entry() {
2616 // Iter-1 Codex review fix mirror of the CodeGraph-side test
2617 // in `concurrent/graph.rs`. Seed a segment for file A, invoke
2618 // `RebuildGraph::remove_file`, and assert both the
2619 // rebuild-local `file_segments` and the finalized `CodeGraph`'s
2620 // `file_segments` table carry no entry for file A. The second
2621 // half of the check is critical: `finalize()` step 12 publishes
2622 // `self.file_segments` verbatim, so a missing clear here would
2623 // leak the stale range into the publishable graph.
2624 use std::path::Path;
2625
2626 let (mut rebuild, file_a, _file_b, file_a_nodes, _) = seed_two_file_rebuild(3);
2627
2628 // Seed a segment for file A. Fish out the span from the
2629 // allocated NodeIds to mirror the shape `phase3_parallel_commit`
2630 // produces.
2631 let first_index = file_a_nodes
2632 .iter()
2633 .map(|n| n.index())
2634 .min()
2635 .expect("per_file = 3");
2636 let last_index = file_a_nodes
2637 .iter()
2638 .map(|n| n.index())
2639 .max()
2640 .expect("per_file = 3");
2641 let slot_count = last_index - first_index + 1;
2642 rebuild
2643 .file_segments
2644 .record_range(file_a, first_index, slot_count);
2645 assert!(
2646 rebuild.file_segments().get(file_a).is_some(),
2647 "seeded segment for file_a must be present before remove_file"
2648 );
2649
2650 // Act.
2651 let _ = rebuild.remove_file(file_a);
2652
2653 // Rebuild-local post-condition.
2654 assert!(
2655 rebuild.file_segments().get(file_a).is_none(),
2656 "RebuildGraph::remove_file must clear the file_segments entry"
2657 );
2658
2659 // Finalize post-condition: the published CodeGraph must also
2660 // carry no entry for file_a.
2661 let finalized = rebuild.finalize().expect("finalize must succeed");
2662 assert!(
2663 finalized.file_segments().get(file_a).is_none(),
2664 "finalize must publish a CodeGraph with no stale file_segments for file_a"
2665 );
2666
2667 // Defensive suppression: the Path import is used only by the
2668 // adjacent `seed_two_file_rebuild` helper when the signature
2669 // is exercised — the let-binding below keeps this test
2670 // self-contained even if the helper's surface changes later.
2671 let _ = Path::new("");
2672 }
2673}