Skip to main content

sqry_core/graph/unified/rebuild/
rebuild_graph.rs

1//! [A2 §H] `RebuildGraph` + `clone_for_rebuild` + `finalize()` — Gate 0c.
2//!
3//! **This module is feature-gated.** Every item lives behind
4//! `#[cfg(feature = "rebuild-internals")]`; only `sqry-daemon` is
5//! whitelisted to enable the feature (see
6//! `sqry-core/tests/rebuild_internals_whitelist.rs` and the plan at
7//! `docs/superpowers/plans/2026-03-19-sqryd-daemon.md` §H "Placement
8//! and feature gate").
9//!
10//! # What this module delivers
11//!
12//! Per the plan's A2 §H (pre-implementation Gate 0c, required before
13//! any `clone_for_rebuild` caller in Task 4 Step 4 can exist):
14//!
15//! 1. [`sqry_graph_fields!`] — a **single source of truth** macro that
16//!    enumerates every field on [`super::super::concurrent::CodeGraph`].
17//!    It is invoked in three places by the rest of this module:
18//!      - `sqry_graph_fields!(@decl_rebuild)` declares the
19//!        [`RebuildGraph`] struct.
20//!      - `sqry_graph_fields!(@clone_inner from self)` is expanded
21//!        inside [`CodeGraph::clone_for_rebuild`]; the destructure
22//!        `let CodeGraph { .. } = self;` is exhaustive, so adding a
23//!        field to [`CodeGraph`] without adding it to the macro's
24//!        field list is a hard `E0027` compile error.
25//!      - `sqry_graph_fields!(@field_names)` emits a `&[&str]` of
26//!        every field name, used by the Gate 0d bijection check
27//!        (future) and by diagnostics.
28//! 2. [`CodeGraph::clone_for_rebuild`] — deep-copies every Arc-wrapped
29//!    field into an owned, rebuild-local [`RebuildGraph`]. The rebuild
30//!    dispatcher (Task 4 Step 4) is the only caller.
31//! 3. [`RebuildGraph::finalize`] — the canonical 14-step sequence
32//!    spelled out in §H lines 658–707. Every step is either an API
33//!    call on an owned component (freeze interner, compact arena,
34//!    compact edges, compact indices, compact metadata, compact file
35//!    buckets, compact K-list extras), a drain/move of scratch state
36//!    (drain tombstones), a derived-state reset (rebuild CSR), a
37//!    per-language metadata update (confidence), a scalar bump (epoch),
38//!    a struct assembly (`Arc::new` wrapping), or a debug invariant
39//!    (bucket bijection + tombstone residue).
40//!
41//! # Why the macro is kind-tagged, not a single callback
42//!
43//! The plan's literal example invokes `sqry_graph_fields!(CodeGraph,
44//! RebuildGraph)` to generate both structs from one sibling macro
45//! call. That shape would require `CodeGraph` to be declared by the
46//! macro — but `CodeGraph` has dozens of `impl` blocks, Clone
47//! semantics, and accessor methods that already destructure the fields
48//! by name on stable Rust. Moving the declaration into a macro
49//! expansion would obscure every one of those sites.
50//!
51//! A callback-based idiom (`sqry_graph_fields!(my_cb!)`) was also
52//! considered; it worked for the struct declaration but collapsed
53//! under `macro_rules!` hygiene constraints for the destructure-and-
54//! construct clone path (the `self` keyword cannot be synthesised by
55//! a macro; passing identifiers across a callback boundary requires
56//! forwarding them through an `:expr` metavar, which loses the
57//! token-level decomposition needed for Arc-vs-scalar dispatch).
58//!
59//! The **kind-tagged single macro** settled on here expands inline at
60//! the call site — `sqry_graph_fields!(@clone_inner from self)` —
61//! so the destructure runs in the caller's hygiene context, the
62//! `self` keyword is the caller's `self`, and the per-field Arc /
63//! scalar / owned dispatch is baked into the macro body without a
64//! secondary callback hop.
65//!
66//! The single-source-of-truth guarantee is the same: adding a field
67//! requires touching (a) the `CodeGraph` struct in
68//! `concurrent/graph.rs`, (b) the field list inside this macro, and
69//! (c) the step in `finalize()` that compacts it. Missing any of
70//! (a) or (b) makes `clone_for_rebuild_inner` fail to compile.
71//!
72//! # Release-build posture
73//!
74//! The debug bucket-bijection and tombstone-residue checks at steps 13
75//! and 14 are gated on `cfg(any(debug_assertions, test))`. Release
76//! builds pay zero overhead. The §E equivalence harness in CI is the
77//! release-time drift backstop (plan §F "Release build semantics").
78
79// Module-level `#[cfg(feature = "rebuild-internals")]` lives on the
80// `pub mod rebuild_graph;` declaration in the parent `mod.rs`; duplicating
81// it here trips the `duplicated_attributes` clippy lint.
82#![allow(clippy::too_many_lines)]
83
84use std::collections::{HashMap, HashSet};
85
86use crate::confidence::ConfidenceMetadata;
87use crate::graph::error::GraphResult;
88use crate::graph::unified::bind::alias::AliasTable;
89use crate::graph::unified::bind::scope::ScopeArena;
90use crate::graph::unified::bind::scope::provenance::ScopeProvenanceStore;
91use crate::graph::unified::bind::shadow::ShadowTable;
92use crate::graph::unified::concurrent::CodeGraph;
93use crate::graph::unified::edge::bidirectional::BidirectionalEdgeStore;
94use crate::graph::unified::node::NodeId;
95use crate::graph::unified::storage::arena::NodeArena;
96use crate::graph::unified::storage::edge_provenance::EdgeProvenanceStore;
97use crate::graph::unified::storage::indices::AuxiliaryIndices;
98use crate::graph::unified::storage::interner::StringInterner;
99use crate::graph::unified::storage::metadata::NodeMetadataStore;
100use crate::graph::unified::storage::node_provenance::NodeProvenanceStore;
101use crate::graph::unified::storage::registry::FileRegistry;
102use crate::graph::unified::storage::segment::FileSegmentTable;
103
104use super::coverage::NodeIdBearing;
105
106// =====================================================================
107// Single source of truth: field list
108// =====================================================================
109
110/// Single source of truth for the [`CodeGraph`] field list.
111///
112/// This macro takes a **kind-tag** selector (`@decl_rebuild`,
113/// `@clone_inner from ...`, `@field_names`) and emits a tailored
114/// expansion for each consumer. The field-list rows live exactly once,
115/// inside this macro body, so any new field is added in one place:
116/// here.
117///
118/// # Adding / renaming / removing a field on [`CodeGraph`]
119///
120/// 1. Edit the `CodeGraph` struct in
121///    `sqry-core/src/graph/unified/concurrent/graph.rs` (declaration
122///    order matters — match the order here).
123/// 2. Edit the field list below. The `@decl_rebuild` arm auto-mirrors
124///    into `RebuildGraph`; the `@clone_inner` arm auto-mirrors into
125///    `clone_for_rebuild_inner`.
126/// 3. Extend `RebuildGraph::finalize()` with the step that compacts
127///    the new field (and, if it carries `NodeId`s, add a K.A/K.B row
128///    + `NodeIdBearing` impl in `super::coverage`).
129///
130/// Missing step (1) or (2) is a hard `E0027` compile error in the
131/// `let CodeGraph { .. } = ...` destructure inside the `@clone_inner`
132/// arm. Step (3) is enforced by the §F tombstone-residue check
133/// (Gate 0d) and by the §E incremental-vs-full equivalence harness.
134#[macro_export]
135macro_rules! sqry_graph_fields {
136    // -------------------------------------------------------------
137    // Arm @decl_rebuild: emit the `RebuildGraph` struct declaration.
138    // -------------------------------------------------------------
139    (@decl_rebuild) => {
140        /// Owned, rebuild-local mirror of [`CodeGraph`].
141        ///
142        /// Each field stores a value type rather than an [`Arc`], so
143        /// the rebuild task can mutate freely without touching the
144        /// live published graph. The only path to produce a
145        /// publishable [`CodeGraph`] from a [`RebuildGraph`] is
146        /// [`RebuildGraph::finalize`]; the trybuild fixture at
147        /// `sqry-core/tests/rebuild_internals_compile_fail/rebuild_graph_no_public_assembly.rs`
148        /// locks that invariant in.
149        pub struct RebuildGraph {
150            pub(crate) nodes: NodeArena,
151            pub(crate) edges: BidirectionalEdgeStore,
152            pub(crate) strings: StringInterner,
153            pub(crate) files: FileRegistry,
154            pub(crate) indices: AuxiliaryIndices,
155            pub(crate) macro_metadata: NodeMetadataStore,
156            pub(crate) node_provenance: NodeProvenanceStore,
157            pub(crate) edge_provenance: EdgeProvenanceStore,
158            pub(crate) fact_epoch: u64,
159            pub(crate) epoch: u64,
160            pub(crate) confidence: HashMap<String, ConfidenceMetadata>,
161            pub(crate) scope_arena: ScopeArena,
162            pub(crate) alias_table: AliasTable,
163            pub(crate) shadow_table: ShadowTable,
164            pub(crate) scope_provenance_store: ScopeProvenanceStore,
165            pub(crate) file_segments: FileSegmentTable,
166            /// Active tombstone set during finalize steps 2–7.
167            /// Populated by `RebuildGraph::remove_file` /
168            /// `RebuildGraph::tombstone` (added by Task 4 Step 4) and
169            /// drained into [`drained_tombstones`] at step 8.
170            pub(crate) tombstones: HashSet<NodeId>,
171            /// Snapshot of [`tombstones`] taken at finalize step 8,
172            /// kept for the debug residue check in step 14.
173            pub(crate) drained_tombstones: HashSet<NodeId>,
174        }
175    };
176
177    // -------------------------------------------------------------
178    // Arm @clone_inner: emit the body of `clone_for_rebuild_inner`.
179    //
180    // The caller passes their `self` binding as `$this:expr` so
181    // macro hygiene preserves the method-local reference. The
182    // destructure is exhaustive — adding a `CodeGraph` field without
183    // also adding it here is a hard `E0027` compile error. Removing
184    // a row here without removing the `CodeGraph` field is an unused-
185    // pattern warning that our `-D warnings` CI setting escalates.
186    // -------------------------------------------------------------
187    (@clone_inner from $this:expr) => {{
188        let CodeGraph {
189            nodes,
190            edges,
191            strings,
192            files,
193            indices,
194            macro_metadata,
195            node_provenance,
196            edge_provenance,
197            fact_epoch,
198            epoch,
199            confidence,
200            scope_arena,
201            alias_table,
202            shadow_table,
203            scope_provenance_store,
204            file_segments,
205        } = $this;
206        RebuildGraph {
207            nodes: (**nodes).clone(),
208            edges: (**edges).clone(),
209            strings: (**strings).clone(),
210            files: (**files).clone(),
211            indices: (**indices).clone(),
212            macro_metadata: (**macro_metadata).clone(),
213            node_provenance: (**node_provenance).clone(),
214            edge_provenance: (**edge_provenance).clone(),
215            fact_epoch: *fact_epoch,
216            epoch: *epoch,
217            confidence: confidence.clone(),
218            scope_arena: (**scope_arena).clone(),
219            alias_table: (**alias_table).clone(),
220            shadow_table: (**shadow_table).clone(),
221            scope_provenance_store: (**scope_provenance_store).clone(),
222            file_segments: (**file_segments).clone(),
223            tombstones: ::std::collections::HashSet::new(),
224            drained_tombstones: ::std::collections::HashSet::new(),
225        }
226    }};
227
228    // -------------------------------------------------------------
229    // Arm @field_names: emit a comma-separated list of every field
230    // name. Used by tests that want to assert coverage of field names
231    // against a declared list without re-parsing Rust source.
232    // -------------------------------------------------------------
233    (@field_names) => {
234        &[
235            "nodes",
236            "edges",
237            "strings",
238            "files",
239            "indices",
240            "macro_metadata",
241            "node_provenance",
242            "edge_provenance",
243            "fact_epoch",
244            "epoch",
245            "confidence",
246            "scope_arena",
247            "alias_table",
248            "shadow_table",
249            "scope_provenance_store",
250            "file_segments",
251        ]
252    };
253}
254
255// The macro name resolves through `$crate::sqry_graph_fields`; re-export
256// for intra-module call sites that prefer the unqualified name.
257pub(crate) use sqry_graph_fields;
258
259// =====================================================================
260// RebuildGraph struct declaration (macro-driven)
261// =====================================================================
262
263sqry_graph_fields!(@decl_rebuild);
264
265// =====================================================================
266// clone_for_rebuild: CodeGraph -> RebuildGraph
267// =====================================================================
268
269impl CodeGraph {
270    /// Produce a fresh [`RebuildGraph`] from this graph's current
271    /// committed state, deep-cloning every Arc-wrapped component so the
272    /// returned value is decoupled from future mutations to `self`.
273    ///
274    /// The exhaustive destructure inside the `@clone_inner` arm of
275    /// [`sqry_graph_fields!`] guarantees that every field on
276    /// [`CodeGraph`] is mirrored into the returned [`RebuildGraph`].
277    /// Adding a field to [`CodeGraph`] without also adding it to the
278    /// field list inside `sqry_graph_fields!` is a hard `E0027`
279    /// compile error on the `let CodeGraph { .. } = self;` destructure.
280    ///
281    /// # When to call this
282    ///
283    /// Only the incremental-rebuild dispatcher (Task 4 Step 4) calls
284    /// `clone_for_rebuild`, on the rebuild task's background tokio
285    /// context, against an `Arc<CodeGraph>` freshly obtained via
286    /// `ArcSwap::load_full()`. The Arc's refcount is 1 in the common
287    /// case, so the underlying deep clones amount to `Arc::get_mut`-
288    /// equivalent cost on each component.
289    ///
290    /// # Performance budget (A2 §H, line 734)
291    ///
292    /// On a 384k-node / 1.3M-edge reference graph, this call must
293    /// complete in < 50 ms. The daemon's rebuild-latency benchmark
294    /// tracks the budget; warning threshold 50 ms, hard record
295    /// threshold 200 ms. Exceeding the warning logs but does not fail
296    /// the rebuild.
297    #[must_use]
298    pub fn clone_for_rebuild(&self) -> RebuildGraph {
299        Self::clone_for_rebuild_inner(self)
300    }
301
302    /// Inner implementation, kept separate so the public entrypoint is
303    /// a single-line shim that also makes the exhaustive destructure
304    /// visible at the top of the rebuild surface.
305    ///
306    /// The `@clone_inner from self` macro arm emits the destructure +
307    /// struct-construction inline; passing `self` as an `:expr`
308    /// metavar preserves call-site hygiene so the macro-introduced
309    /// field bindings (`nodes`, `edges`, ...) are resolvable against
310    /// the `self` binding in this method.
311    fn clone_for_rebuild_inner(&self) -> RebuildGraph {
312        sqry_graph_fields!(@clone_inner from self)
313    }
314}
315
316// =====================================================================
317// finalize(): RebuildGraph -> CodeGraph — the 14-step contract
318// =====================================================================
319
320impl RebuildGraph {
321    /// Consume this [`RebuildGraph`] and assemble a publishable
322    /// [`CodeGraph`].
323    ///
324    /// This is the **only** safe path from [`RebuildGraph`] back to
325    /// [`CodeGraph`]. No public API converts a [`RebuildGraph`] into
326    /// an [`Arc<CodeGraph>`] any other way, and no
327    /// `From<RebuildGraph> for CodeGraph` impl exists — the trybuild
328    /// fixture at
329    /// `sqry-core/tests/rebuild_internals_compile_fail/rebuild_graph_no_public_assembly.rs`
330    /// locks that property in.
331    ///
332    /// # The 14 ordered steps (plan §H lines 658–707)
333    ///
334    /// Each numbered comment below traces the matching plan step. In
335    /// debug / test builds, steps 13 and 14 also execute the §F
336    /// bijection and tombstone-residue checks; release builds compile
337    /// them out.
338    ///
339    /// Steps 2–7 uniformly invoke
340    /// [`super::coverage::NodeIdBearing::retain_nodes`] with the
341    /// closure `|nid| !self.tombstones.contains(&nid)`, so adding a new
342    /// K.A or K.B row for a future `NodeId`-bearing container
343    /// automatically extends compaction coverage once the new row's
344    /// `retain_nodes` call is appended to step 7.
345    ///
346    /// # Errors
347    ///
348    /// Returns `Err(GraphBuilderError::Internal{..})` only if a
349    /// compaction primitive fails; all current primitives are
350    /// infallible, so `finalize()` is currently infallible. The `Result`
351    /// return is preserved for future fallible compaction steps without
352    /// a signature change.
353    pub fn finalize(mut self) -> GraphResult<CodeGraph> {
354        // ----------------------------------------------------------------
355        // Step 1 — Freeze the rebuild's interner.
356        //
357        // The plan §H describes step 1 as `new_strings =
358        // self.string_builder.freeze()`. In this codebase the
359        // rebuild-local interner is a [`StringInterner`] (no separate
360        // "builder" type), so the concrete freeze operation is:
361        //
362        //   (a) canonicalise the interner by running
363        //       [`StringInterner::build_dedup_table`]. This guarantees:
364        //         * `lookup_stale == false` (required for all read-path
365        //           accessors); any in-flight bulk-alloc residue from a
366        //           prior failed rebuild is healed.
367        //         * Every string value is backed by exactly one
368        //           canonical slot — no duplicate arcs remain. The
369        //           `ref_counts` of duplicate slots are folded into
370        //           their canonical slot. `StringId` values held by
371        //           live `NodeEntry`s remain valid because this pass
372        //           only collapses slots that are *not* referenced from
373        //           live nodes (nothing in this pass renames already
374        //           live slots).
375        //   (b) rewrite every live `NodeEntry` through the remap table
376        //       produced by (a) so any node whose fields happen to
377        //       point at a now-collapsed duplicate slot is rewired to
378        //       the canonical slot. This preserves the post-freeze
379        //       invariant "every live `NodeEntry` references only
380        //       canonical `StringId`s" even if earlier rebuild steps
381        //       produced duplicate slots.
382        //   (c) prune unreferenced slots with
383        //       [`StringInterner::recycle_unreferenced`] so the frozen
384        //       interner carries only strings the final graph actually
385        //       needs. This is the step that drives
386        //       `interner_live_ratio < interner_compaction_threshold`
387        //       down over time (plan §H line 713) so the next
388        //       housekeeping rebuild observes an accurate live ratio.
389        //
390        // After this block runs, `self.strings` is in a fully
391        // canonical, shared-immutable-ready state. Step 12's
392        // `Arc::new(new_strings)` is then purely the ownership-to-
393        // shared transition; the semantic freeze itself has already
394        // happened here.
395        // ----------------------------------------------------------------
396        let string_remap = self.strings.build_dedup_table();
397        if !string_remap.is_empty() {
398            // Rewrite every live node's interned-string fields through
399            // the remap so freshly-collapsed duplicate slots are not
400            // reachable from any live `NodeEntry`. This is required
401            // for (c) below to be safe to call: `recycle_unreferenced`
402            // only frees slots with `ref_count == 0`, so the refcount
403            // bookkeeping that `build_dedup_table` consolidated into
404            // the canonical slot must be honoured by node-level fields
405            // as well.
406            rewrite_node_entries_through_remap(&mut self.nodes, &string_remap);
407            // Iter-2 B1 (verbatim): `AuxiliaryIndices::name_index` /
408            // `qualified_name_index` are keyed by `StringId`. If
409            // `build_dedup_table()` collapses a key's slot and
410            // `recycle_unreferenced` subsequently frees it, the bucket
411            // keys would dangle. Remap every `StringId`-backed holder
412            // on the rebuild through the same dedup table before the
413            // recycle pass runs, merging any buckets that collapse onto
414            // the same canonical key.
415            //
416            // All remap-capable stores are listed here exhaustively.
417            // Extending this block when a new `CodeGraph` field gains a
418            // `StringId` payload is a code-owner obligation tied to the
419            // `sqry_graph_fields!` macro (plan §H "Placement and feature
420            // gate"). Today, the StringId-bearing surfaces on the
421            // publish-visible graph are:
422            //
423            //   * `AuxiliaryIndices.name_index` / `qualified_name_index`
424            //     (BTreeMap keys — collapse on canonicalisation)
425            //   * `FileRegistry` — `FileEntry.source_uri: Option<StringId>`
426            //     per slot
427            //   * `AliasTable` — `AliasEntry.from_symbol` /
428            //     `to_symbol` per entry; the `(scope, from_symbol)`
429            //     sort key is re-established after rewrite
430            //   * `ShadowTable` — `ShadowEntry.symbol` per entry; the
431            //     `(scope, symbol, byte_offset)` sort key and the
432            //     `chains` range index are rebuilt after rewrite
433            //
434            // `NodeArena` (above) is the fifth surface; it is handled
435            // inline because it is the single largest consumer and
436            // already has a purpose-built helper.
437            //
438            // Iter-3 B1 (verbatim): `BidirectionalEdgeStore` holds
439            // `EdgeKind` instances whose variants — `Imports{alias}`,
440            // `Exports{alias}`, `TypeOf{name}`, `TraitMethodBinding
441            // {trait_name, impl_type}`, `HttpRequest{url}`,
442            // `GrpcCall{service, method}`, `DbQuery{table}`,
443            // `TableRead{table_name, schema}`, `TableWrite{table_name,
444            // schema}`, `TriggeredBy{trigger_name, schema}`,
445            // `MessageQueue{protocol::Other(_), topic}`,
446            // `WebSocket{event}`, `GraphQLOperation{operation}`,
447            // `ProcessExec{command}`, `FileIpc{path_pattern}`,
448            // `ProtocolCall{protocol, metadata}` — carry
449            // `StringId` payloads inside both the steady-state CSR
450            // (`CsrGraph::edge_kind`) and the mutable delta
451            // (`DeltaBuffer` `DeltaEdge::kind`), in **both** the
452            // forward and reverse stores. Before this iter-4 fix,
453            // step 1 left those payloads un-remapped — so after
454            // `recycle_unreferenced` below, those edges would have
455            // dangled onto freed interner slots. The full-build
456            // pipeline recognises this surface explicitly at
457            // `build/entrypoint.rs` (Phase 4b) +
458            // `build/parallel_commit.rs::phase4_apply_global_remap`;
459            // the rebuild pipeline must do the same against
460            // committed storage because, unlike Phase 4b, the edges
461            // are no longer a `Vec<Vec<PendingEdge>>`.
462            //
463            // `BidirectionalEdgeStore::rewrite_edge_kind_string_ids_through_remap`
464            // is the sixth surface on this list. The exhaustive match
465            // on `EdgeKind` lives in
466            // `parallel_commit::remap_edge_kind_string_ids` so adding a
467            // new `StringId`-bearing variant becomes a single source of
468            // truth: a compile error there forces both pipelines to
469            // rewrite it.
470            //
471            // Stores whose data is keyed by `NodeId`/`FileId`/`EdgeId`
472            // only (metadata keyed on NodeId, node / edge provenance,
473            // scope arena indexed by ScopeId, scope provenance,
474            // file-segments keyed on FileId/EdgeId) do not hold
475            // `StringId` payloads and are out of scope for this
476            // rewrite. Any future field that lifts a `StringId` onto
477            // `CodeGraph` must extend this list in the same commit
478            // that declares it, matching the A2 §K discipline.
479            self.indices.rewrite_string_ids_through_remap(&string_remap);
480            self.files.rewrite_string_ids_through_remap(&string_remap);
481            self.alias_table
482                .rewrite_string_ids_through_remap(&string_remap);
483            self.shadow_table
484                .rewrite_string_ids_through_remap(&string_remap);
485            // Iter-4 K.B1 fix: rewrite `StringId` payloads carried by
486            // committed `EdgeKind`s in both forward and reverse stores,
487            // across both CSR and delta tiers. Must run before
488            // `recycle_unreferenced` below so freed slots stay unreferenced.
489            self.edges
490                .rewrite_edge_kind_string_ids_through_remap(&string_remap);
491        }
492        self.strings.recycle_unreferenced();
493
494        // ----------------------------------------------------------------
495        // Step 2 — Compact NodeArena.
496        //
497        // K.A1 row: `NodeArena::retain_nodes` drops every occupied slot
498        // whose NodeId fails the predicate, advancing the slot's
499        // generation so lingering NodeId handles become stale.
500        // ----------------------------------------------------------------
501        {
502            let tombstones = &self.tombstones;
503            let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
504                Box::new(move |nid| !tombstones.contains(&nid));
505            self.nodes.retain_nodes(&*predicate);
506        }
507
508        // ----------------------------------------------------------------
509        // Step 3 — Compact BidirectionalEdgeStore (forward + reverse).
510        //
511        // K.A2 + K.A3 rows: `retain_nodes` drops every delta edge with
512        // a tombstoned endpoint in either direction. CSR compaction is
513        // deferred to step 9 (CSR is derived state, rebuilt not mutated).
514        // ----------------------------------------------------------------
515        {
516            let tombstones = &self.tombstones;
517            let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
518                Box::new(move |nid| !tombstones.contains(&nid));
519            self.edges.retain_nodes(&*predicate);
520        }
521
522        // ----------------------------------------------------------------
523        // Step 4 — Compact AuxiliaryIndices.
524        //
525        // K.A4–K.A7 rows (kind / name / qualified-name / file indices):
526        // a single `AuxiliaryIndices::retain_nodes` call visits every
527        // inner bucket.
528        // ----------------------------------------------------------------
529        {
530            let tombstones = &self.tombstones;
531            let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
532                Box::new(move |nid| !tombstones.contains(&nid));
533            self.indices.retain_nodes(&*predicate);
534        }
535
536        // ----------------------------------------------------------------
537        // Step 5 — Compact NodeMetadataStore.
538        //
539        // K.A8 row: macro / classpath per-node metadata keyed by the
540        // full `(index, generation)` pair.
541        // ----------------------------------------------------------------
542        {
543            let tombstones = &self.tombstones;
544            let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
545                Box::new(move |nid| !tombstones.contains(&nid));
546            self.macro_metadata.retain_nodes(&*predicate);
547        }
548
549        // ----------------------------------------------------------------
550        // Step 6 — Compact FileRegistry per-file buckets.
551        //
552        // K.B1 row. Iter-2 B2 (verbatim): this step used to be a
553        // no-op scheduled against a future base-plan Step 1. Pulling
554        // `per_file_nodes: HashMap<FileId, Vec<NodeId>>` forward into
555        // Gate 0c retires the no-op — every `NodeId` the rebuild's
556        // parallel-parse pass committed is already bucketed via
557        // `FileRegistry::record_node`, so this step now does real work:
558        //
559        //   * drop every `NodeId` whose arena slot was tombstoned in
560        //     step 2 (or earlier) via the shared predicate
561        //   * dedup each surviving bucket
562        //   * drop buckets that collapse to empty
563        //
564        // The §F.1 bucket-bijection check at step 13 consumes the
565        // result of this compaction, so a mis-applied predicate here
566        // surfaces as a panic at the publish boundary in debug builds.
567        // ----------------------------------------------------------------
568        {
569            let tombstones = &self.tombstones;
570            let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
571                Box::new(move |nid| !tombstones.contains(&nid));
572            self.files.retain_nodes(&*predicate);
573        }
574
575        // ----------------------------------------------------------------
576        // Step 7 — Compact K-list extras.
577        //
578        // Rows K.A10 (node_provenance), K.A11 (scope_arena),
579        // K.A12 (alias_table), K.A13 (shadow_table). Each is a direct
580        // NodeIdBearing::retain_nodes call.
581        //
582        // When a future task lifts a new NodeId-bearing structure onto
583        // `CodeGraph`, extend this step with one additional
584        // `retain_nodes` call and add a K.A/K.B row to
585        // `super::coverage` per the plan §K contract.
586        // ----------------------------------------------------------------
587        {
588            let tombstones = &self.tombstones;
589            let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
590                Box::new(move |nid| !tombstones.contains(&nid));
591            self.node_provenance.retain_nodes(&*predicate);
592            self.scope_arena.retain_nodes(&*predicate);
593            self.alias_table.retain_nodes(&*predicate);
594            self.shadow_table.retain_nodes(&*predicate);
595        }
596
597        // ----------------------------------------------------------------
598        // Step 8 — Drain tombstones into `drained_tombstones`.
599        //
600        // The residue check at step 14 asserts that the finalized
601        // `CodeGraph` does not contain any of these NodeIds in any
602        // NodeId-bearing structure. Keep the drained set until the
603        // check completes; then it is dropped with `self`.
604        // ----------------------------------------------------------------
605        self.drained_tombstones = std::mem::take(&mut self.tombstones);
606
607        // ----------------------------------------------------------------
608        // Step 9 — Rebuild CSR adjacency (both directions).
609        //
610        // CSR is derived state (K.A9 — "CSR adjacency is derived state;
611        // rebuilt from compacted edges — never mutated in place"). After
612        // step 3 filtered the delta tier to live-only endpoints, the
613        // correct operation is to *construct* a fresh CSR from the
614        // compacted delta snapshot and *install* it — not merely drop
615        // the stale cache. This matches plan §H lines 684–691 (the
616        // committed-then-queried graph has a CSR from the first read).
617        //
618        // Build path: `compaction::snapshot_edges` → compaction merges
619        // in-memory delta (no tombstones left after step 3) →
620        // `build_compacted_csr` constructs an immutable `CsrGraph` →
621        // `swap_csrs_and_clear_deltas` installs both directions and
622        // clears the now-absorbed delta buffers. The node_count is the
623        // post-compaction slot count of the arena (not the live count —
624        // CSR uses dense slot indexing; vacant slots appear as nodes
625        // with zero out-edges).
626        //
627        // The plan's example spells this `self.edges.rebuild_csr()` as
628        // shorthand for this sequence; we expand it to concrete calls
629        // so reviewers can audit each primitive. The post-condition is
630        // identical to the plan's: after step 9 the `CodeGraph` about
631        // to be assembled has no CSR referencing a tombstoned NodeId,
632        // because the CSR was built from the already-filtered delta.
633        // ----------------------------------------------------------------
634        {
635            use crate::graph::unified::compaction::{
636                Direction, build_compacted_csr, snapshot_edges,
637            };
638            let node_count = self.nodes.slot_count();
639            // Snapshot the forward/reverse deltas (CSR is still stale
640            // at this point — `build_compacted_csr` uses the delta's
641            // live-only contents). Because step 3 tombstoned endpoints
642            // out of the delta already, the merged build sees only
643            // live edges.
644            let forward_snapshot = {
645                let forward = self.edges.forward();
646                snapshot_edges(&forward, node_count)
647            };
648            let reverse_snapshot = {
649                let reverse = self.edges.reverse();
650                snapshot_edges(&reverse, node_count)
651            };
652            // Build both directions in parallel (matches the pattern
653            // used by `build/entrypoint.rs`).
654            let (forward_csr, reverse_csr) = rayon::join(
655                || build_compacted_csr(&forward_snapshot, Direction::Forward),
656                || build_compacted_csr(&reverse_snapshot, Direction::Reverse),
657            );
658            let (forward_csr, _) =
659                forward_csr.map_err(|e| crate::graph::error::GraphBuilderError::Internal {
660                    reason: format!("rebuild finalize step 9 (forward CSR build): {e}"),
661                })?;
662            let (reverse_csr, _) =
663                reverse_csr.map_err(|e| crate::graph::error::GraphBuilderError::Internal {
664                    reason: format!("rebuild finalize step 9 (reverse CSR build): {e}"),
665                })?;
666            // Install both CSRs and clear the absorbed deltas.
667            self.edges
668                .swap_csrs_and_clear_deltas(forward_csr, reverse_csr);
669        }
670
671        // ----------------------------------------------------------------
672        // Step 10 — Per-language confidence update.
673        //
674        // Incremental rebuild must ensure the published confidence map
675        // matches the set of languages that actually have live nodes in
676        // the rebuild's graph. Languages whose only source files were
677        // all removed during this rebuild must not linger in the
678        // confidence surface — they would mislead MCP clients into
679        // believing analysis for that language is still available.
680        //
681        // The rebuild-local `FileRegistry` carries per-file `Language`
682        // tags; we enumerate them, collect the set of languages with at
683        // least one live file, and drop confidence entries for any
684        // language that no longer appears. Entries for still-present
685        // languages are preserved as-is (their `ConfidenceMetadata` is
686        // updated by the plugin-level confidence-ingestion path in
687        // `CodeGraph::merge_confidence`; this step only filters, it
688        // does not fabricate new samples).
689        //
690        // A language entry is preserved when (a) at least one live file
691        // is tagged with that language in the rebuild-local registry,
692        // OR (b) the confidence entry carries a non-default set of
693        // limitations / unavailable-features — that is analysis
694        // metadata the plugin layer deliberately recorded, and dropping
695        // it on a zero-file rebuild would be lossy. Both conditions are
696        // evaluated against the rebuild-local state that will be
697        // published by step 12.
698        // ----------------------------------------------------------------
699        {
700            use std::collections::BTreeSet;
701            let mut active_languages: BTreeSet<String> = BTreeSet::new();
702            for (_file_id, _path, maybe_language) in self.files.iter_with_language() {
703                if let Some(language) = maybe_language {
704                    active_languages.insert(language.to_string());
705                }
706            }
707            self.confidence.retain(|language_key, meta| {
708                if active_languages.contains(language_key) {
709                    true
710                } else {
711                    // Preserve entries that encode deliberate
712                    // plugin-recorded limitations even when no live
713                    // files remain — those are analysis-state facts
714                    // the daemon must not silently drop.
715                    !meta.limitations.is_empty() || !meta.unavailable_features.is_empty()
716                }
717            });
718        }
719
720        // ----------------------------------------------------------------
721        // Step 11 — Epoch bump.
722        //
723        // `prior_epoch` captured at `clone_for_rebuild` time; +1 marks
724        // the publication boundary. Wrapping add matches the existing
725        // `CodeGraph::bump_epoch` behavior at
726        // `sqry-core/src/graph/unified/concurrent/graph.rs`.
727        // ----------------------------------------------------------------
728        let new_epoch = self.epoch.wrapping_add(1);
729
730        // ----------------------------------------------------------------
731        // Step 12 — Assemble the immutable `CodeGraph`.
732        //
733        // Every Arc-wrapped field is freshly wrapped from the
734        // rebuild-local owned value, so the new `CodeGraph` has no
735        // Arc-sharing relationship with the pre-rebuild snapshot. This
736        // is the point at which the rebuild-local interner becomes
737        // immutable (step 1's freeze).
738        // ----------------------------------------------------------------
739        let graph = CodeGraph::__assemble_from_rebuild_parts_internal(
740            self.nodes,
741            self.edges,
742            self.strings,
743            self.files,
744            self.indices,
745            self.macro_metadata,
746            self.node_provenance,
747            self.edge_provenance,
748            self.fact_epoch,
749            new_epoch,
750            self.confidence,
751            self.scope_arena,
752            self.alias_table,
753            self.shadow_table,
754            self.scope_provenance_store,
755            self.file_segments,
756        );
757
758        // ----------------------------------------------------------------
759        // Steps 13 + 14 — (debug) Publish-boundary invariants.
760        //
761        // Per plan §F.3 "single source of truth", both the §F.1 bucket
762        // bijection and the §F.2 tombstone-residue checks fire through
763        // the canonical `publish::assert_publish_invariants` helper.
764        // That helper is also called at the full-rebuild end
765        // (`build_unified_graph_inner`), by Task 6's
766        // `WorkspaceManager::publish_graph`, and by every §E harness
767        // iteration — so any invariant drift surfaces in all four
768        // places simultaneously, not in a subset.
769        //
770        // The §F.2 assertion is the **single site** against
771        // `self.drained_tombstones` (populated at step 8); we do not
772        // run the residue check anywhere else.
773        // ----------------------------------------------------------------
774        #[cfg(any(debug_assertions, test))]
775        crate::graph::unified::publish::assert_publish_invariants(&graph, &self.drained_tombstones);
776
777        Ok(graph)
778    }
779
780    /// Returns the number of NodeIds currently staged for tombstoning
781    /// via [`RebuildGraph::tombstone`] (to be added by Task 4 Step 4).
782    /// Useful for Gate 0c tests that exercise finalize with an empty
783    /// or non-empty tombstone set.
784    #[must_use]
785    pub fn pending_tombstone_count(&self) -> usize {
786        self.tombstones.len()
787    }
788
789    /// Shared-reference accessor for the rebuild-local
790    /// [`NodeArena`]. Read-only; the writer is the rebuild dispatcher
791    /// which holds `&mut self`.
792    ///
793    /// Used by sqry-daemon's `WorkspaceManager` (Task 6) and by the
794    /// Task 4 Step 4 scale test to inspect arena state between
795    /// [`remove_file`](Self::remove_file) calls without going through
796    /// the pub(crate) field directly.
797    #[must_use]
798    pub fn nodes(&self) -> &NodeArena {
799        &self.nodes
800    }
801
802    /// Shared-reference accessor for the rebuild-local
803    /// [`FileRegistry`]. Read-only. The rebuild dispatcher writes via
804    /// `&mut self` on [`remove_file`](Self::remove_file) and
805    /// finalization.
806    #[must_use]
807    pub fn files(&self) -> &FileRegistry {
808        &self.files
809    }
810
811    /// Shared-reference accessor for the rebuild-local
812    /// [`FileSegmentTable`]. Read-only.
813    ///
814    /// `FileSegmentTable` is a plain `Vec<Option<FileSegment>>` with no
815    /// interior mutability (see
816    /// `sqry-core/src/graph/unified/storage/segment.rs`), so this
817    /// accessor is genuinely read-only. Writers ride through the
818    /// `&mut self` mutation path inside
819    /// [`remove_file`](Self::remove_file) and
820    /// [`finalize`](Self::finalize).
821    ///
822    /// Added as part of the iter-1 Codex review fix for Task 4
823    /// Steps 2-3: the `remove_file` integration tests need to assert
824    /// that a file's segment entry is cleared after removal, closing
825    /// the FileId-recycle stale-range bug documented at
826    /// `sqry-core/tests/incremental_remove_file_scale.rs`.
827    #[must_use]
828    pub fn file_segments(&self) -> &FileSegmentTable {
829        &self.file_segments
830    }
831
832    // Note (iter-1 review fix): a `pub fn edges(&self) -> &BidirectionalEdgeStore`
833    // accessor was removed. `BidirectionalEdgeStore` exposes
834    // `forward_mut(&self)` and `reverse_mut(&self)` (interior mutability),
835    // so returning `&BidirectionalEdgeStore` from `&self` would let
836    // external callers escalate a "read-only" handle into a writer. The
837    // rebuild contract requires that edge mutation only happens inside
838    // `RebuildGraph::remove_file` / `finalize` on `&mut self`. If a
839    // read-only edge-view accessor is ever needed by a legitimate
840    // consumer, add a wrapper struct (e.g., `BidirectionalEdgeStoreView`)
841    // that exposes only `forward()` / `reverse()` / `edges_from` /
842    // `edges_to` / `stats` on `&self` — never the `*_mut` methods.
843
844    /// Stage a NodeId for tombstoning during the next `finalize` pass.
845    ///
846    /// Gate 0c ships this helper so finalize tests can drive the
847    /// 14-step contract with a realistic tombstone set without waiting
848    /// for Task 4 Step 4's `remove_file` plumbing. Production callers
849    /// will route through `RebuildGraph::remove_file` (Task 4 Step 4),
850    /// which internally populates the same set via
851    /// `FileRegistry::take_nodes`.
852    pub fn tombstone(&mut self, id: NodeId) {
853        self.tombstones.insert(id);
854    }
855
856    /// Stage every `NodeId` in `ids` for tombstoning during the next
857    /// `finalize` pass. Equivalent to calling [`tombstone`](Self::tombstone)
858    /// for each id, but expresses the bulk intent at the call site.
859    ///
860    /// Used by [`remove_file`](Self::remove_file) to fold every
861    /// file-local NodeId into the staged set in a single pass.
862    pub(crate) fn tombstone_many<I: IntoIterator<Item = NodeId>>(&mut self, ids: I) {
863        self.tombstones.extend(ids);
864    }
865
866    /// Drain every `NodeId` belonging to `file_id`, invalidate every
867    /// rebuild-local edge whose source or target is one of those nodes
868    /// (across both forward and reverse CSR + delta tiers of the
869    /// rebuild-local edge store), drop the file's entry from the
870    /// rebuild-local [`FileRegistry`], and return the list of
871    /// tombstoned [`NodeId`]s.
872    ///
873    /// This is the rebuild-side mirror of
874    /// [`super::super::concurrent::CodeGraph::remove_file`]. Whereas
875    /// the `CodeGraph` variant mutates a live publishable graph in
876    /// place (O(1) publish once the caller wraps it in an `Arc`), this
877    /// variant operates on the owned, pre-finalize
878    /// [`RebuildGraph`] state — so the edge-store tombstones land in
879    /// the CSR's tombstone bitmap and the delta buffer, and are later
880    /// physically purged by [`finalize`](Self::finalize) step 9 when
881    /// the CSR is rebuilt from the compacted delta (§H lines 684–691).
882    ///
883    /// The method does **not** rewrite `NodeIdBearing` surfaces on the
884    /// rebuild (NodeArena, AuxiliaryIndices, NodeMetadataStore,
885    /// NodeProvenanceStore, ScopeArena, AliasTable, ShadowTable) —
886    /// those surfaces are compacted once, uniformly, by `finalize()`
887    /// steps 2–7 against the accumulated `tombstones` set. Running a
888    /// partial compaction here would duplicate work and violate the
889    /// plan's "compact once at step N" contract.
890    ///
891    /// Instead, the tombstoned `NodeId`s are accumulated in
892    /// `self.tombstones` via [`tombstone_many`](Self::tombstone_many).
893    /// Successive `remove_file` calls against different files
894    /// accumulate into the same set; `finalize()` then sweeps every
895    /// K.A/K.B surface once with the union predicate.
896    ///
897    /// # What is tombstoned immediately
898    ///
899    /// Only two surfaces are mutated before `finalize()` runs:
900    ///
901    /// * **NodeArena**: each file-local `NodeId` is `remove`d so the
902    ///   slot's generation advances and stale handles cannot alias a
903    ///   re-allocation. Downstream compaction at step 2 is then a
904    ///   no-op for these slots (idempotent; the arena skips stale
905    ///   generations).
906    /// * **Edge store** (both forward + reverse, both CSR + delta):
907    ///   [`BidirectionalEdgeStore::tombstone_edges_for_nodes`] kills
908    ///   every edge whose source or target is one of the drained
909    ///   NodeIds. CSR tombstones land in `csr_tombstones`; delta
910    ///   edges are dropped outright. Step 9 of `finalize` then
911    ///   rebuilds a fresh CSR from the tombstone-free delta, so the
912    ///   CSR tombstones become physically invisible by publish time.
913    /// * **FileRegistry**: the bucket is drained via
914    ///   [`FileRegistry::take_nodes`] and the file entry is
915    ///   deregistered via [`FileRegistry::unregister`]. Both are
916    ///   idempotent on an already-removed file.
917    ///
918    /// # Returned value
919    ///
920    /// The list of NodeIds that were staged for tombstoning. Empty on
921    /// an unknown or already-removed file. Useful for Gate 0c finalize
922    /// tests that need to assert on bucket-drain correctness or on the
923    /// union of staged tombstones across several `remove_file` calls.
924    ///
925    /// # Idempotency
926    ///
927    /// Calling twice with the same `file_id` is a no-op on the second
928    /// call. The bucket is already drained, `take_nodes` returns an
929    /// empty `Vec`, the rest of the method short-circuits, and the
930    /// `tombstones` set is unchanged.
931    #[allow(dead_code)] // Intra-crate consumer is Task 4 Step 4
932    // (`incremental_rebuild`); external consumer is sqry-daemon's
933    // Task 6 `WorkspaceManager` via the feature-gated `pub use` of
934    // `RebuildGraph`. The visibility is `pub` (not `pub(crate)`) so
935    // the daemon can drive file removals through the rebuild plane,
936    // which is the canonical path for file-deletion events per §F.2.
937    pub fn remove_file(&mut self, file_id: super::super::file::FileId) -> Vec<NodeId> {
938        // Drain the rebuild-local FileRegistry bucket.
939        let tombstoned: Vec<NodeId> = self.files.take_nodes(file_id);
940        // Deregister the file entry unconditionally (idempotent).
941        self.files.unregister(file_id);
942        // Clear the rebuild-local `FileSegmentTable` entry for this
943        // file (idempotent — `remove` no-ops on unknown ids). This
944        // matches `CodeGraph::remove_file`'s behaviour: finalize()
945        // publishes `self.file_segments` verbatim at step 12, so any
946        // stale entry leaked here would survive into the assembled
947        // `CodeGraph`. Because `FileRegistry::unregister` recycles
948        // `FileId` slots, a leaked segment would later alias a
949        // different file's node range after the slot is reissued —
950        // which would cause `reindex_files` to tombstone the wrong
951        // range. Clearing the segment at remove time is the only
952        // defence that closes both the "finalize publishes stale
953        // segment" path AND the "FileId recycle attaches stale
954        // range" path.
955        self.file_segments.remove(file_id);
956
957        if tombstoned.is_empty() {
958            return tombstoned;
959        }
960
961        let dead: std::collections::HashSet<NodeId> = tombstoned.iter().copied().collect();
962
963        // 1. Tombstone each arena slot. `NodeArena::remove` is
964        //    idempotent against stale generations, so retriggering a
965        //    file removal after finalize is safe.
966        for &nid in &tombstoned {
967            let _ = self.nodes.remove(nid);
968        }
969
970        // 2. Invalidate edges across both CSR + delta in both
971        //    directions on the rebuild-local edge store.
972        self.edges.tombstone_edges_for_nodes(&dead);
973
974        // 3. Stage the drained NodeIds for the finalize-time K.A/K.B
975        //    compaction sweep. finalize() will consume these at steps
976        //    2–7 through the NodeIdBearing::retain_nodes predicate.
977        self.tombstone_many(tombstoned.iter().copied());
978
979        tombstoned
980    }
981
982    /// Returns the rebuild-local epoch captured at `clone_for_rebuild`.
983    #[must_use]
984    pub fn prior_epoch(&self) -> u64 {
985        self.epoch
986    }
987
988    /// Pre-finalize tombstone-residue check on the rebuild-state
989    /// structures themselves (plan §F.2 literal named API).
990    ///
991    /// Iterates every `NodeIdBearing` field on this `RebuildGraph` and
992    /// panics if any contains a NodeId present in `self.tombstones`.
993    /// This is a diagnostic helper for mid-rebuild consistency checks:
994    /// e.g., Task 4 Step 4's `RebuildGraph::remove_file` can call this
995    /// after tombstoning a file but before more closure files land, to
996    /// prove the just-tombstoned NodeIds really left every index before
997    /// the next pass commits. The `RebuildGraph::finalize` flow then
998    /// re-asserts against the drained set on the *assembled* `CodeGraph`
999    /// at step 14 — those are two different snapshots of the same
1000    /// invariant; both must hold.
1001    ///
1002    /// No-op when `self.tombstones` is empty or in release builds.
1003    ///
1004    /// Does **not** replace the step-14 call site; the single source of
1005    /// truth for the publish-boundary residue check remains
1006    /// `crate::graph::unified::publish::assert_publish_invariants`. This
1007    /// helper exists so pre-finalize call sites (Task 4 Step 4's
1008    /// post-remove sanity check, incremental-engine debug probes, Gate
1009    /// 0d negative tests) can name the assertion without rolling their
1010    /// own iteration.
1011    #[cfg(any(debug_assertions, test))]
1012    pub fn assert_no_tombstone_residue(&self) {
1013        use super::coverage::NodeIdBearing;
1014        let dead = &self.tombstones;
1015        if dead.is_empty() {
1016            return;
1017        }
1018        for nid in self.nodes.all_node_ids() {
1019            assert!(
1020                !dead.contains(&nid),
1021                "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in NodeArena"
1022            );
1023        }
1024        for nid in self.edges.all_node_ids() {
1025            assert!(
1026                !dead.contains(&nid),
1027                "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in edge store"
1028            );
1029        }
1030        for nid in self.indices.all_node_ids() {
1031            assert!(
1032                !dead.contains(&nid),
1033                "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in auxiliary indices"
1034            );
1035        }
1036        for nid in self.macro_metadata.all_node_ids() {
1037            assert!(
1038                !dead.contains(&nid),
1039                "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in macro metadata"
1040            );
1041        }
1042        for nid in self.node_provenance.all_node_ids() {
1043            assert!(
1044                !dead.contains(&nid),
1045                "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in node provenance"
1046            );
1047        }
1048        for nid in self.scope_arena.all_node_ids() {
1049            assert!(
1050                !dead.contains(&nid),
1051                "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in scope arena"
1052            );
1053        }
1054        for nid in self.alias_table.all_node_ids() {
1055            assert!(
1056                !dead.contains(&nid),
1057                "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in alias table"
1058            );
1059        }
1060        for nid in self.shadow_table.all_node_ids() {
1061            assert!(
1062                !dead.contains(&nid),
1063                "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in shadow table"
1064            );
1065        }
1066        for nid in self.files.all_node_ids() {
1067            assert!(
1068                !dead.contains(&nid),
1069                "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in per-file bucket"
1070            );
1071        }
1072    }
1073}
1074
1075// =====================================================================
1076// Step 1 helper: rewrite every live `NodeEntry`'s interned-string
1077// fields through the dedup-remap table produced by
1078// `StringInterner::build_dedup_table()`.
1079// =====================================================================
1080
1081/// Rewrite every `NodeEntry` field that holds a [`crate::graph::unified::string::id::StringId`]
1082/// through `remap` so no live node points at a collapsed-duplicate
1083/// slot. Called from finalize step 1 after the interner has been
1084/// canonicalised; a no-op when `remap` is empty (the common case of a
1085/// no-op rebuild).
1086///
1087/// Fields rewritten: `name`, `signature`, `doc`, `qualified_name`,
1088/// `visibility`. These are every `StringId` / `Option<StringId>` field
1089/// that `NodeEntry` currently declares. Adding a new `StringId`-valued
1090/// field to `NodeEntry` requires extending this helper — the closest
1091/// enforcement is the compile-fail coverage in the `sqry_graph_fields!`
1092/// macro which forces a new `RebuildGraph` field and a matching
1093/// finalize step for any new `CodeGraph` field; individual arena
1094/// fields are caught at review time against this helper.
1095fn rewrite_node_entries_through_remap(
1096    nodes: &mut NodeArena,
1097    remap: &HashMap<
1098        crate::graph::unified::string::id::StringId,
1099        crate::graph::unified::string::id::StringId,
1100    >,
1101) {
1102    if remap.is_empty() {
1103        return;
1104    }
1105    for (_id, entry) in nodes.iter_mut() {
1106        if let Some(&canon) = remap.get(&entry.name) {
1107            entry.name = canon;
1108        }
1109        if let Some(sid) = entry.signature
1110            && let Some(&canon) = remap.get(&sid)
1111        {
1112            entry.signature = Some(canon);
1113        }
1114        if let Some(sid) = entry.doc
1115            && let Some(&canon) = remap.get(&sid)
1116        {
1117            entry.doc = Some(canon);
1118        }
1119        if let Some(sid) = entry.qualified_name
1120            && let Some(&canon) = remap.get(&sid)
1121        {
1122            entry.qualified_name = Some(canon);
1123        }
1124        if let Some(sid) = entry.visibility
1125            && let Some(&canon) = remap.get(&sid)
1126        {
1127            entry.visibility = Some(canon);
1128        }
1129    }
1130}
1131
1132// =====================================================================
1133// Assembly path notes (A2 §H "Type-enforced publish path")
1134// =====================================================================
1135//
1136// The only route from `RebuildGraph` to `Arc<CodeGraph>` is:
1137//
1138//     let code_graph = rebuild.finalize()?;            // Step 12 assembles CodeGraph
1139//     let arc = Arc::new(code_graph);                  // caller wraps (publish site)
1140//
1141// The assembly inside `finalize` uses
1142// `CodeGraph::__assemble_from_rebuild_parts_internal`, which is
1143// `pub(crate)` on `CodeGraph`. That means downstream crates — even
1144// `sqry-daemon` with `rebuild-internals` enabled — cannot call the
1145// assembler directly; they must route through `finalize()`, which
1146// guarantees the 14-step compaction sequence runs first.
1147//
1148// The trybuild fixture
1149// `sqry-core/tests/rebuild_internals_compile_fail/rebuild_graph_no_public_assembly.rs`
1150// exercises this: a downstream crate attempting to call
1151// `__assemble_from_rebuild_parts_internal` fails with E0603 ("function
1152// is private"), and attempting to `impl From<RebuildGraph> for CodeGraph`
1153// from outside this module fails with E0117 (orphan-rule violation).
1154
1155// =====================================================================
1156// Tests
1157// =====================================================================
1158
1159#[cfg(test)]
1160mod tests {
1161    use super::*;
1162    use crate::graph::unified::file::FileId;
1163    use crate::graph::unified::node::NodeKind;
1164    use crate::graph::unified::storage::arena::NodeEntry;
1165    use crate::graph::unified::storage::metadata::MacroNodeMetadata;
1166
1167    /// Seed a `CodeGraph` with a handful of live nodes distributed over
1168    /// two files so the per-field compaction steps have something
1169    /// non-trivial to operate on.
1170    fn seeded_graph() -> (CodeGraph, NodeId, NodeId, NodeId) {
1171        let mut graph = CodeGraph::new();
1172        let file_a = FileId::new(1);
1173        let file_b = FileId::new(2);
1174        let sym = graph.strings_mut().intern("symbol_a").expect("intern a");
1175        let node_a;
1176        let node_b;
1177        let node_c;
1178        {
1179            let arena = graph.nodes_mut();
1180            node_a = arena
1181                .alloc(NodeEntry::new(NodeKind::Function, sym, file_a))
1182                .expect("alloc a");
1183            node_b = arena
1184                .alloc(NodeEntry::new(NodeKind::Method, sym, file_a))
1185                .expect("alloc b");
1186            node_c = arena
1187                .alloc(NodeEntry::new(NodeKind::Struct, sym, file_b))
1188                .expect("alloc c");
1189        }
1190        graph
1191            .indices_mut()
1192            .add(node_a, NodeKind::Function, sym, Some(sym), file_a);
1193        graph
1194            .indices_mut()
1195            .add(node_b, NodeKind::Method, sym, Some(sym), file_a);
1196        graph
1197            .indices_mut()
1198            .add(node_c, NodeKind::Struct, sym, Some(sym), file_b);
1199        graph
1200            .macro_metadata_mut()
1201            .insert(node_a, MacroNodeMetadata::default());
1202        (graph, node_a, node_b, node_c)
1203    }
1204
1205    #[test]
1206    fn clone_for_rebuild_copies_every_field_without_arc_sharing() {
1207        let (graph, _a, _b, _c) = seeded_graph();
1208        let rebuild = graph.clone_for_rebuild();
1209
1210        // Field-level counts match the source graph.
1211        assert_eq!(rebuild.nodes.len(), graph.nodes().len());
1212        assert_eq!(rebuild.macro_metadata.len(), graph.macro_metadata().len());
1213        // The rebuild owns its own arena (not an Arc share), so
1214        // mutating it in place would not affect the source graph. We
1215        // verify this by tombstoning a node in the rebuild and
1216        // confirming the source is unchanged.
1217        let mut rebuild = rebuild;
1218        let ids: Vec<NodeId> = rebuild.nodes.all_node_ids().collect();
1219        let victim = *ids.first().expect("at least one node");
1220        rebuild.tombstone(victim);
1221        assert_eq!(rebuild.pending_tombstone_count(), 1);
1222        // Source graph unaffected.
1223        assert_eq!(graph.nodes().len(), ids.len());
1224    }
1225
1226    #[test]
1227    fn clone_for_rebuild_preserves_epoch_and_fact_epoch() {
1228        let (mut graph, _, _, _) = seeded_graph();
1229        graph.set_epoch(7);
1230        let rebuild = graph.clone_for_rebuild();
1231        assert_eq!(rebuild.prior_epoch(), 7);
1232        assert_eq!(rebuild.fact_epoch, graph.fact_epoch());
1233    }
1234
1235    // ---- Step-level finalize tests ---------------------------------
1236
1237    #[test]
1238    fn finalize_step11_bumps_epoch_by_one() {
1239        let (mut graph, _, _, _) = seeded_graph();
1240        graph.set_epoch(41);
1241        let rebuild = graph.clone_for_rebuild();
1242        let finalized = rebuild.finalize().expect("finalize ok");
1243        assert_eq!(finalized.epoch(), 42);
1244    }
1245
1246    #[test]
1247    fn finalize_step8_drains_tombstones_into_drained_set() {
1248        // We use a reach-into-guts test: finalize consumes self, so we
1249        // verify the drain via a direct RebuildGraph constructed inside
1250        // this module (which has crate visibility into the field).
1251        let (graph, a, _b, _c) = seeded_graph();
1252        let mut rebuild = graph.clone_for_rebuild();
1253        rebuild.tombstone(a);
1254        assert_eq!(rebuild.pending_tombstone_count(), 1);
1255        // After finalize, the graph must not contain `a`.
1256        let finalized = rebuild.finalize().expect("finalize ok");
1257        assert!(
1258            finalized.nodes().get(a).is_none(),
1259            "tombstoned node must be gone from arena"
1260        );
1261    }
1262
1263    #[test]
1264    fn finalize_steps_2_and_4_compact_arena_and_indices_consistently() {
1265        let (graph, a, _b, c) = seeded_graph();
1266        let mut rebuild = graph.clone_for_rebuild();
1267        rebuild.tombstone(a);
1268        rebuild.tombstone(c);
1269        let finalized = rebuild.finalize().expect("finalize ok");
1270
1271        // Arena compaction (step 2): dropped nodes are gone.
1272        assert!(finalized.nodes().get(a).is_none());
1273        assert!(finalized.nodes().get(c).is_none());
1274        // Step 4 must compact the auxiliary indices in lockstep, so
1275        // the tombstoned ids do not linger in any index.
1276        use crate::graph::unified::storage::metadata::NodeMetadata;
1277        let _ = NodeMetadata::Macro(MacroNodeMetadata::default()); // silence unused import warning on some cfgs
1278        assert!(!finalized.indices().by_kind(NodeKind::Function).contains(&a));
1279        assert!(!finalized.indices().by_kind(NodeKind::Struct).contains(&c));
1280    }
1281
1282    #[test]
1283    fn finalize_step5_compacts_macro_metadata() {
1284        let (graph, a, _b, _c) = seeded_graph();
1285        let mut rebuild = graph.clone_for_rebuild();
1286        rebuild.tombstone(a);
1287        let finalized = rebuild.finalize().expect("finalize ok");
1288        // `a` had macro metadata seeded; after finalize it must be gone.
1289        assert!(finalized.macro_metadata().get(a).is_none());
1290    }
1291
1292    #[test]
1293    fn finalize_step9_installs_rebuilt_csr() {
1294        // Step 9 (plan §H lines 684–691) must install a freshly built
1295        // CSR — not merely drop the stale cache. The assembled graph's
1296        // forward and reverse edge stores must both expose a `csr()`
1297        // after finalize, and the deltas must be empty (absorbed by
1298        // the swap).
1299        let (graph, _, _, _) = seeded_graph();
1300        let rebuild = graph.clone_for_rebuild();
1301        let finalized = rebuild.finalize().expect("finalize ok");
1302        assert!(
1303            finalized.edges().forward().csr().is_some(),
1304            "step 9 must install forward CSR"
1305        );
1306        assert!(
1307            finalized.edges().reverse().csr().is_some(),
1308            "step 9 must install reverse CSR"
1309        );
1310        // Deltas were absorbed by the swap.
1311        assert_eq!(finalized.edges().forward().delta_count(), 0);
1312        assert_eq!(finalized.edges().reverse().delta_count(), 0);
1313    }
1314
1315    #[test]
1316    fn finalize_step10_drops_confidence_for_removed_languages() {
1317        // Seed a graph with Rust + Python confidence; the rebuild-local
1318        // FileRegistry has zero files tagged (seeded_graph does not
1319        // register paths), so step 10 must drop both.
1320        let (mut graph, _, _, _) = seeded_graph();
1321        graph.merge_confidence(
1322            "rust",
1323            crate::confidence::ConfidenceMetadata {
1324                level: crate::confidence::ConfidenceLevel::Verified,
1325                ..Default::default()
1326            },
1327        );
1328        graph.merge_confidence(
1329            "python",
1330            crate::confidence::ConfidenceMetadata {
1331                level: crate::confidence::ConfidenceLevel::Partial,
1332                ..Default::default()
1333            },
1334        );
1335        assert_eq!(graph.confidence().len(), 2);
1336
1337        let rebuild = graph.clone_for_rebuild();
1338        let finalized = rebuild.finalize().expect("finalize ok");
1339        assert!(
1340            finalized.confidence().is_empty(),
1341            "step 10 must drop confidence entries for languages that \
1342             have no live files and no recorded limitations"
1343        );
1344    }
1345
1346    #[test]
1347    fn finalize_step10_preserves_confidence_with_limitations() {
1348        // A confidence entry that encodes deliberate limitations must
1349        // survive a zero-file rebuild: dropping it would silently lose
1350        // plugin-recorded analysis state.
1351        let (mut graph, _, _, _) = seeded_graph();
1352        graph.merge_confidence(
1353            "rust",
1354            crate::confidence::ConfidenceMetadata {
1355                level: crate::confidence::ConfidenceLevel::AstOnly,
1356                limitations: vec!["no rust-analyzer".to_string()],
1357                unavailable_features: vec!["type inference".to_string()],
1358            },
1359        );
1360        let rebuild = graph.clone_for_rebuild();
1361        let finalized = rebuild.finalize().expect("finalize ok");
1362        assert_eq!(finalized.confidence().len(), 1);
1363        assert!(finalized.confidence().contains_key("rust"));
1364    }
1365
1366    #[test]
1367    fn finalize_step1_canonicalises_interner_via_dedup() {
1368        // Freeze step must leave the interner's lookup consistent
1369        // (non-stale) and zero duplicate slots after finalize.
1370        let (graph, _, _, _) = seeded_graph();
1371        let rebuild = graph.clone_for_rebuild();
1372        let finalized = rebuild.finalize().expect("finalize ok");
1373        assert!(
1374            !finalized.strings().is_lookup_stale(),
1375            "step 1 freeze must leave lookup_stale == false"
1376        );
1377    }
1378
1379    #[test]
1380    fn finalize_is_infallible_on_empty_tombstone_set() {
1381        let (graph, _, _, _) = seeded_graph();
1382        let rebuild = graph.clone_for_rebuild();
1383        let result = rebuild.finalize();
1384        assert!(result.is_ok(), "empty-tombstone finalize must succeed");
1385        let finalized = result.unwrap();
1386        // Every node survives.
1387        assert_eq!(finalized.nodes().len(), 3);
1388    }
1389
1390    #[test]
1391    fn finalize_survives_interner_snapshot_unchanged_when_no_edits() {
1392        let (graph, _, _, _) = seeded_graph();
1393        let prior_string_count = graph.strings().len();
1394        let rebuild = graph.clone_for_rebuild();
1395        let finalized = rebuild.finalize().expect("finalize ok");
1396        assert_eq!(
1397            finalized.strings().len(),
1398            prior_string_count,
1399            "freeze step must preserve string count across a no-op rebuild"
1400        );
1401    }
1402
1403    #[test]
1404    fn rebuild_graph_pending_tombstone_count_is_accurate() {
1405        let (graph, a, b, c) = seeded_graph();
1406        let mut rebuild = graph.clone_for_rebuild();
1407        assert_eq!(rebuild.pending_tombstone_count(), 0);
1408        rebuild.tombstone(a);
1409        rebuild.tombstone(b);
1410        rebuild.tombstone(c);
1411        // Inserting the same id again is idempotent.
1412        rebuild.tombstone(a);
1413        assert_eq!(rebuild.pending_tombstone_count(), 3);
1414    }
1415
1416    // ----------------------------------------------------------------
1417    // Iter-2 B1: StringId remap across every StringId-backed holder.
1418    // ----------------------------------------------------------------
1419
1420    /// Construct a RebuildGraph whose interner has intentional duplicate
1421    /// `StringId` slots (two slots containing the same canonical
1422    /// "symbol_dup" bytes), with the **live** state referencing both
1423    /// slots. `build_dedup_table()` must collapse them, step 1 must
1424    /// rewrite every StringId-backed surface through the remap, and
1425    /// step 4 must leave no bucket pointing at a recycled StringId.
1426    #[test]
1427    fn step1_remaps_auxiliary_indices_keys_through_dedup_table() {
1428        use crate::graph::unified::storage::interner::StringInterner;
1429
1430        let mut graph = CodeGraph::new();
1431        // Intern "alpha" twice via the bulk path so the two slots are
1432        // NOT coalesced at intern time. We emulate that by manually
1433        // using `alloc_range` + direct bulk_slices_mut assignment on
1434        // the interner.
1435        let interner: &mut StringInterner = graph.strings_mut();
1436        let start = interner.alloc_range(2).expect("alloc range");
1437        {
1438            let (s_slots, rc_slots) = interner.bulk_slices_mut(start, 2);
1439            s_slots[0] = Some(std::sync::Arc::from("alpha"));
1440            s_slots[1] = Some(std::sync::Arc::from("alpha"));
1441            rc_slots[0] = 1;
1442            rc_slots[1] = 1;
1443        }
1444        let id_dup = crate::graph::unified::string::id::StringId::new(start + 1);
1445        let id_canon = crate::graph::unified::string::id::StringId::new(start);
1446        let file = FileId::new(1);
1447
1448        // Allocate one node that references the duplicate StringId so
1449        // after dedup the arena still points at a canonical id.
1450        let mut entry = NodeEntry::new(NodeKind::Function, id_dup, file);
1451        entry.qualified_name = Some(id_dup);
1452        let node = graph.nodes_mut().alloc(entry).expect("alloc");
1453        graph
1454            .indices_mut()
1455            .add(node, NodeKind::Function, id_dup, Some(id_dup), file);
1456
1457        let rebuild = graph.clone_for_rebuild();
1458        let finalized = rebuild.finalize().expect("finalize ok");
1459
1460        // After step 1 the duplicate slot must be recycled and every
1461        // live surface must reference the canonical slot.
1462        // (a) Arena field references canonical
1463        let entry = finalized.nodes().get(node).expect("node survives");
1464        assert_eq!(entry.name, id_canon, "NodeEntry.name must be canonicalised");
1465        assert_eq!(
1466            entry.qualified_name,
1467            Some(id_canon),
1468            "NodeEntry.qualified_name must be canonicalised"
1469        );
1470        // (b) AuxiliaryIndices: by_name must find the node under canonical id.
1471        assert!(
1472            finalized.indices().by_name(id_canon).contains(&node),
1473            "indices.by_name under canonical StringId must contain the node"
1474        );
1475        // (c) And must NOT have any bucket under the duplicate StringId.
1476        assert!(
1477            finalized.indices().by_name(id_dup).is_empty(),
1478            "duplicate StringId bucket must be empty after remap"
1479        );
1480        assert!(
1481            finalized.indices().by_qualified_name(id_dup).is_empty(),
1482            "duplicate StringId qname bucket must be empty after remap"
1483        );
1484        // (d) Interner has no stale lookup.
1485        assert!(!finalized.strings().is_lookup_stale());
1486    }
1487
1488    #[test]
1489    fn step1_merges_aux_indices_buckets_when_keys_collapse() {
1490        use crate::graph::unified::storage::interner::StringInterner;
1491
1492        let mut graph = CodeGraph::new();
1493        let interner: &mut StringInterner = graph.strings_mut();
1494        let start = interner.alloc_range(2).expect("alloc range");
1495        {
1496            let (s_slots, rc_slots) = interner.bulk_slices_mut(start, 2);
1497            s_slots[0] = Some(std::sync::Arc::from("beta"));
1498            s_slots[1] = Some(std::sync::Arc::from("beta"));
1499            rc_slots[0] = 1;
1500            rc_slots[1] = 1;
1501        }
1502        let id_canon = crate::graph::unified::string::id::StringId::new(start);
1503        let id_dup = crate::graph::unified::string::id::StringId::new(start + 1);
1504        let file = FileId::new(1);
1505
1506        // Two nodes: one references canonical, one references duplicate.
1507        // After remap, both must land in the canonical bucket of
1508        // `name_index` (merged, deduplicated by NodeId).
1509        let node_canon = graph
1510            .nodes_mut()
1511            .alloc(NodeEntry::new(NodeKind::Function, id_canon, file))
1512            .expect("alloc canon");
1513        let node_dup = graph
1514            .nodes_mut()
1515            .alloc(NodeEntry::new(NodeKind::Function, id_dup, file))
1516            .expect("alloc dup");
1517        graph
1518            .indices_mut()
1519            .add(node_canon, NodeKind::Function, id_canon, None, file);
1520        graph
1521            .indices_mut()
1522            .add(node_dup, NodeKind::Function, id_dup, None, file);
1523
1524        let rebuild = graph.clone_for_rebuild();
1525        let finalized = rebuild.finalize().expect("finalize ok");
1526
1527        let canonical_bucket = finalized.indices().by_name(id_canon);
1528        assert!(canonical_bucket.contains(&node_canon));
1529        assert!(canonical_bucket.contains(&node_dup));
1530        // No duplicates within the merged bucket.
1531        let mut seen = std::collections::HashSet::new();
1532        for id in canonical_bucket {
1533            assert!(seen.insert(*id), "bucket must be dedup'd by NodeId");
1534        }
1535        assert!(
1536            finalized.indices().by_name(id_dup).is_empty(),
1537            "collapsed duplicate key bucket must be removed"
1538        );
1539    }
1540
1541    #[test]
1542    fn step1_remaps_file_registry_source_uri() {
1543        use crate::graph::unified::storage::interner::StringInterner;
1544
1545        let mut graph = CodeGraph::new();
1546        // Force two identical "file:///foo" StringId slots.
1547        let interner: &mut StringInterner = graph.strings_mut();
1548        let start = interner.alloc_range(2).expect("alloc range");
1549        {
1550            let (s, rc) = interner.bulk_slices_mut(start, 2);
1551            s[0] = Some(std::sync::Arc::from("file:///foo"));
1552            s[1] = Some(std::sync::Arc::from("file:///foo"));
1553            rc[0] = 1;
1554            rc[1] = 1;
1555        }
1556        let id_canon = crate::graph::unified::string::id::StringId::new(start);
1557        let id_dup = crate::graph::unified::string::id::StringId::new(start + 1);
1558
1559        let fid = graph
1560            .files_mut()
1561            .register_external_with_uri(
1562                "/virtual/Foo.class",
1563                Some(crate::graph::node::Language::Java),
1564                Some(id_dup),
1565            )
1566            .expect("register external");
1567
1568        let rebuild = graph.clone_for_rebuild();
1569        let finalized = rebuild.finalize().expect("finalize ok");
1570
1571        let view = finalized
1572            .files()
1573            .file_provenance(fid)
1574            .expect("provenance present");
1575        assert_eq!(
1576            view.source_uri,
1577            Some(id_canon),
1578            "FileRegistry source_uri must be rewritten to canonical StringId"
1579        );
1580    }
1581
1582    // ----------------------------------------------------------------
1583    // Iter-4 B1: edge-store StringId remap across delta + CSR tiers.
1584    // ----------------------------------------------------------------
1585
1586    /// Construct a `RebuildGraph` whose interner has three slots
1587    /// containing the same canonical bytes ("symbol_edge_dup"), with
1588    /// two distinct `EdgeKind::Imports { alias }` edges pointing at
1589    /// different duplicate slots. Step 1 must rewrite both aliases
1590    /// through the dedup table so the post-step-9 CSR references only
1591    /// the canonical slot; the duplicate slots must be recycled by
1592    /// `recycle_unreferenced` (ref_count == 0) and the canonical slot
1593    /// must retain references.
1594    ///
1595    /// This test intentionally exercises both the delta-tier write path
1596    /// (`DeltaBuffer::iter_mut`) *during* step 1 (where the edges still
1597    /// live in the delta buffer because no compaction has been run)
1598    /// and the CSR-tier read path (where the edges land after step 9's
1599    /// `swap_csrs_and_clear_deltas`). The companion test
1600    /// `step1_remaps_edge_kind_string_ids_in_csr_tier` covers the
1601    /// symmetric case where edges start in the CSR tier at the time
1602    /// step 1 runs.
1603    #[test]
1604    fn step1_remaps_edge_kind_string_ids_through_dedup_table() {
1605        use crate::graph::unified::edge::EdgeKind;
1606        use crate::graph::unified::storage::interner::StringInterner;
1607
1608        let mut graph = CodeGraph::new();
1609        let file_a = FileId::new(1);
1610        // Force three slots carrying the same canonical bytes so
1611        // `build_dedup_table()` collapses slot[start+1] and slot[start+2]
1612        // onto slot[start].
1613        let interner: &mut StringInterner = graph.strings_mut();
1614        let start = interner.alloc_range(3).expect("alloc range");
1615        {
1616            let (s, rc) = interner.bulk_slices_mut(start, 3);
1617            s[0] = Some(std::sync::Arc::from("symbol_edge_dup"));
1618            s[1] = Some(std::sync::Arc::from("symbol_edge_dup"));
1619            s[2] = Some(std::sync::Arc::from("symbol_edge_dup"));
1620            rc[0] = 1;
1621            rc[1] = 1;
1622            rc[2] = 1;
1623        }
1624        let id_canon = crate::graph::unified::string::id::StringId::new(start);
1625        let id_dup_1 = crate::graph::unified::string::id::StringId::new(start + 1);
1626        let id_dup_2 = crate::graph::unified::string::id::StringId::new(start + 2);
1627
1628        // Allocate two nodes so we can hang two edges off distinct
1629        // (src, tgt) pairs and verify both are remapped.
1630        let src;
1631        let tgt;
1632        {
1633            let arena = graph.nodes_mut();
1634            src = arena
1635                .alloc(NodeEntry::new(NodeKind::Function, id_canon, file_a))
1636                .expect("alloc src");
1637            tgt = arena
1638                .alloc(NodeEntry::new(NodeKind::Function, id_canon, file_a))
1639                .expect("alloc tgt");
1640        }
1641        graph
1642            .indices_mut()
1643            .add(src, NodeKind::Function, id_canon, None, file_a);
1644        graph
1645            .indices_mut()
1646            .add(tgt, NodeKind::Function, id_canon, None, file_a);
1647
1648        // Edge 1: Imports { alias = id_dup_1 } — forward src→tgt.
1649        graph.edges_mut().add_edge(
1650            src,
1651            tgt,
1652            EdgeKind::Imports {
1653                alias: Some(id_dup_1),
1654                is_wildcard: false,
1655            },
1656            file_a,
1657        );
1658        // Edge 2: Imports { alias = id_dup_2 } — forward tgt→src (so the
1659        // two edges are distinct keys in the store).
1660        graph.edges_mut().add_edge(
1661            tgt,
1662            src,
1663            EdgeKind::Imports {
1664                alias: Some(id_dup_2),
1665                is_wildcard: true,
1666            },
1667            file_a,
1668        );
1669
1670        // Pre-finalize sanity: the edges live in the delta tier.
1671        assert!(
1672            graph.edges().forward().delta_count() >= 2,
1673            "pre-finalize: forward delta must hold both Imports edges"
1674        );
1675        assert!(
1676            graph.edges().reverse().delta_count() >= 2,
1677            "pre-finalize: reverse delta must hold the mirror of both Imports edges"
1678        );
1679
1680        let rebuild = graph.clone_for_rebuild();
1681        let finalized = rebuild.finalize().expect("finalize ok");
1682
1683        // After finalize step 9, edges have been absorbed into the CSR
1684        // tier by `swap_csrs_and_clear_deltas`. Step 1's `EdgeKind`
1685        // rewrite ran on the delta *before* step 9 snapshotted it, so
1686        // the CSR we assert on is built from the already-remapped delta.
1687        //
1688        // Forward CSR: every `Imports` alias must be canonical.
1689        let forward = finalized.edges().forward();
1690        let fwd_csr = forward
1691            .csr()
1692            .expect("forward CSR must be populated after step 9");
1693        let mut fwd_imports_seen = 0usize;
1694        for kind in fwd_csr.edge_kind() {
1695            if let EdgeKind::Imports { alias, .. } = kind {
1696                fwd_imports_seen += 1;
1697                assert_ne!(
1698                    *alias,
1699                    Some(id_dup_1),
1700                    "forward CSR Imports alias must not reference pre-dedup slot id_dup_1"
1701                );
1702                assert_ne!(
1703                    *alias,
1704                    Some(id_dup_2),
1705                    "forward CSR Imports alias must not reference pre-dedup slot id_dup_2"
1706                );
1707                assert_eq!(
1708                    *alias,
1709                    Some(id_canon),
1710                    "forward CSR Imports alias must be canonicalised"
1711                );
1712            }
1713        }
1714        drop(forward);
1715        assert!(
1716            fwd_imports_seen >= 2,
1717            "forward CSR must hold both Imports edges after finalize (saw {fwd_imports_seen})"
1718        );
1719
1720        // Reverse CSR: mirror edges must be canonicalised identically.
1721        let reverse = finalized.edges().reverse();
1722        let rev_csr = reverse
1723            .csr()
1724            .expect("reverse CSR must be populated after step 9");
1725        let mut rev_imports_seen = 0usize;
1726        for kind in rev_csr.edge_kind() {
1727            if let EdgeKind::Imports { alias, .. } = kind {
1728                rev_imports_seen += 1;
1729                assert_ne!(*alias, Some(id_dup_1));
1730                assert_ne!(*alias, Some(id_dup_2));
1731                assert_eq!(*alias, Some(id_canon));
1732            }
1733        }
1734        drop(reverse);
1735        assert!(
1736            rev_imports_seen >= 2,
1737            "reverse CSR must mirror the forward Imports edges (saw {rev_imports_seen})"
1738        );
1739
1740        // Step-1 (c) invariant: after `recycle_unreferenced`, the
1741        // duplicate slots must report `ref_count == 0`. If step 1's
1742        // edge-store remap had skipped `EdgeKind` payloads, the
1743        // duplicates' refcounts would stay positive (held by the edge
1744        // store) and recycle would not reclaim them.
1745        assert_eq!(
1746            finalized.strings().ref_count(id_dup_1),
1747            0,
1748            "duplicate slot id_dup_1 must be recycled (ref_count == 0) by step 1"
1749        );
1750        assert_eq!(
1751            finalized.strings().ref_count(id_dup_2),
1752            0,
1753            "duplicate slot id_dup_2 must be recycled (ref_count == 0) by step 1"
1754        );
1755        assert!(
1756            finalized.strings().ref_count(id_canon) > 0,
1757            "canonical slot must retain references from the two Imports edges (got {})",
1758            finalized.strings().ref_count(id_canon)
1759        );
1760    }
1761
1762    /// Same construction as above, but compact the edges into a CSR tier
1763    /// before calling finalize so the rewrite covers the steady-state
1764    /// `CsrGraph::edge_kind` path. This exercises the branch of
1765    /// `BidirectionalEdgeStore::rewrite_edge_kind_string_ids_through_remap`
1766    /// that is otherwise dead in the tests above.
1767    #[test]
1768    fn step1_remaps_edge_kind_string_ids_in_csr_tier() {
1769        use crate::graph::unified::compaction::{Direction, build_compacted_csr, snapshot_edges};
1770        use crate::graph::unified::edge::EdgeKind;
1771        use crate::graph::unified::storage::interner::StringInterner;
1772
1773        let mut graph = CodeGraph::new();
1774        let file_a = FileId::new(1);
1775        let interner: &mut StringInterner = graph.strings_mut();
1776        let start = interner.alloc_range(2).expect("alloc range");
1777        {
1778            let (s, rc) = interner.bulk_slices_mut(start, 2);
1779            s[0] = Some(std::sync::Arc::from("csr_alias_dup"));
1780            s[1] = Some(std::sync::Arc::from("csr_alias_dup"));
1781            rc[0] = 1;
1782            rc[1] = 1;
1783        }
1784        let id_canon = crate::graph::unified::string::id::StringId::new(start);
1785        let id_dup = crate::graph::unified::string::id::StringId::new(start + 1);
1786
1787        let src;
1788        let tgt;
1789        {
1790            let arena = graph.nodes_mut();
1791            src = arena
1792                .alloc(NodeEntry::new(NodeKind::Function, id_canon, file_a))
1793                .expect("alloc src");
1794            tgt = arena
1795                .alloc(NodeEntry::new(NodeKind::Function, id_canon, file_a))
1796                .expect("alloc tgt");
1797        }
1798        graph
1799            .indices_mut()
1800            .add(src, NodeKind::Function, id_canon, None, file_a);
1801        graph
1802            .indices_mut()
1803            .add(tgt, NodeKind::Function, id_canon, None, file_a);
1804
1805        // Push the Imports edge into delta with alias pointing at the
1806        // duplicate StringId slot.
1807        graph.edges_mut().add_edge(
1808            src,
1809            tgt,
1810            EdgeKind::Imports {
1811                alias: Some(id_dup),
1812                is_wildcard: false,
1813            },
1814            file_a,
1815        );
1816
1817        // Compact delta → CSR on both directions. This mirrors what the
1818        // full-build pipeline does after Phase 4d plus an explicit
1819        // compaction step.
1820        let node_count = 2;
1821        let edges = graph.edges_mut();
1822        let fwd_snap = snapshot_edges(&edges.forward(), node_count);
1823        let (forward_csr, _) =
1824            build_compacted_csr(&fwd_snap, Direction::Forward).expect("forward csr");
1825        let rev_snap = snapshot_edges(&edges.reverse(), node_count);
1826        let (reverse_csr, _) =
1827            build_compacted_csr(&rev_snap, Direction::Reverse).expect("reverse csr");
1828        edges.swap_csrs_and_clear_deltas(forward_csr, reverse_csr);
1829        assert!(
1830            edges.forward().csr().is_some(),
1831            "forward CSR must be present"
1832        );
1833        assert!(
1834            edges.reverse().csr().is_some(),
1835            "reverse CSR must be present"
1836        );
1837        assert_eq!(edges.forward().delta_count(), 0, "delta must be empty");
1838        assert_eq!(edges.reverse().delta_count(), 0, "delta must be empty");
1839
1840        let rebuild = graph.clone_for_rebuild();
1841        let finalized = rebuild.finalize().expect("finalize ok");
1842
1843        // CSR tier: every `edge_kind` entry must reference the canonical
1844        // StringId, never the duplicate.
1845        let forward = finalized.edges().forward();
1846        let csr = forward
1847            .csr()
1848            .expect("forward CSR must survive finalize step 1");
1849        let mut imports_seen = 0usize;
1850        for kind in csr.edge_kind() {
1851            if let EdgeKind::Imports { alias, .. } = kind {
1852                imports_seen += 1;
1853                assert_ne!(
1854                    *alias,
1855                    Some(id_dup),
1856                    "CSR Imports alias must not reference pre-dedup duplicate"
1857                );
1858                assert_eq!(
1859                    *alias,
1860                    Some(id_canon),
1861                    "CSR Imports alias must be canonicalised"
1862                );
1863            }
1864        }
1865        assert!(
1866            imports_seen > 0,
1867            "forward CSR must hold at least one Imports edge"
1868        );
1869        drop(forward);
1870
1871        // Reverse CSR — same guarantee.
1872        let reverse = finalized.edges().reverse();
1873        let rev_csr = reverse
1874            .csr()
1875            .expect("reverse CSR must survive finalize step 1");
1876        let mut rev_imports_seen = 0usize;
1877        for kind in rev_csr.edge_kind() {
1878            if let EdgeKind::Imports { alias, .. } = kind {
1879                rev_imports_seen += 1;
1880                assert_ne!(*alias, Some(id_dup));
1881                assert_eq!(*alias, Some(id_canon));
1882            }
1883        }
1884        assert!(rev_imports_seen > 0, "reverse CSR must hold Imports edges");
1885        drop(reverse);
1886
1887        assert_eq!(
1888            finalized.strings().ref_count(id_dup),
1889            0,
1890            "duplicate slot must be recycled (ref_count == 0) by step 1"
1891        );
1892    }
1893
1894    /// Exhaustive coverage: every `StringId`-holding surface *reachable
1895    /// through `CodeGraph`'s public mutation API* must be rewritten by
1896    /// finalize step 1. This test drives each surface with a distinct
1897    /// duplicate `StringId` slot and verifies after finalize that (i)
1898    /// every surface references the canonical slot, never the duplicate;
1899    /// (ii) every duplicate slot is recycled (`ref_count == 0`); (iii)
1900    /// the canonical slot retains a positive ref count.
1901    ///
1902    /// Surfaces exercised:
1903    /// * **Arena** (`NodeEntry.name`, `NodeEntry.qualified_name`)
1904    /// * **AuxiliaryIndices** (`name_index` / `qualified_name_index` keys)
1905    /// * **FileRegistry** (`FileEntry.source_uri`)
1906    /// * **BidirectionalEdgeStore** (`EdgeKind::Imports.alias` — delta tier)
1907    ///
1908    /// `AliasTable` and `ShadowTable` StringId fields live under private
1909    /// API (`pub(crate) fn set_alias_table`, no public entry mutators)
1910    /// and are already covered by the iter-2 `step1_remaps_*` tests above
1911    /// that exercise their `rewrite_string_ids_through_remap` helpers
1912    /// end-to-end through the binding-plane derivation path. Covering
1913    /// them here would require either exposing additional test-only APIs
1914    /// or duplicating the iter-2 coverage; neither adds assurance beyond
1915    /// what the dedicated iter-2 tests already provide.
1916    #[test]
1917    fn step1_remaps_all_stringid_holders_exhaustively() {
1918        use crate::graph::unified::edge::EdgeKind;
1919        use crate::graph::unified::storage::interner::StringInterner;
1920
1921        let mut graph = CodeGraph::new();
1922        let file_a = FileId::new(1);
1923        let interner: &mut StringInterner = graph.strings_mut();
1924        // 10 slots = 5 surfaces × 2 (canon + dup) interleaved.
1925        let start = interner.alloc_range(10).expect("alloc range");
1926        {
1927            let (s, rc) = interner.bulk_slices_mut(start, 10);
1928            for (i, label) in [
1929                "arena_name",
1930                "arena_name",
1931                "arena_qname",
1932                "arena_qname",
1933                "idx_key",
1934                "idx_key",
1935                "edge_alias",
1936                "edge_alias",
1937                "file_uri",
1938                "file_uri",
1939            ]
1940            .iter()
1941            .enumerate()
1942            {
1943                s[i] = Some(std::sync::Arc::from(*label));
1944                rc[i] = 1;
1945            }
1946        }
1947        let sid = |off: u32| crate::graph::unified::string::id::StringId::new(start + off);
1948        let arena_name_canon = sid(0);
1949        let arena_name_dup = sid(1);
1950        let arena_qname_canon = sid(2);
1951        let arena_qname_dup = sid(3);
1952        let idx_key_canon = sid(4);
1953        let idx_key_dup = sid(5);
1954        let edge_alias_canon = sid(6);
1955        let edge_alias_dup = sid(7);
1956        let file_uri_canon = sid(8);
1957        let file_uri_dup = sid(9);
1958
1959        // (a) Arena — name + qualified_name reference duplicate slots.
1960        let node;
1961        let node2;
1962        {
1963            let arena = graph.nodes_mut();
1964            let mut entry = NodeEntry::new(NodeKind::Function, arena_name_dup, file_a);
1965            entry.qualified_name = Some(arena_qname_dup);
1966            node = arena.alloc(entry).expect("alloc arena node");
1967            node2 = arena
1968                .alloc(NodeEntry::new(NodeKind::Function, arena_name_dup, file_a))
1969                .expect("alloc arena node2");
1970        }
1971        // (b) Indices — key under the duplicate StringId.
1972        graph
1973            .indices_mut()
1974            .add(node, NodeKind::Function, idx_key_dup, None, file_a);
1975        graph
1976            .indices_mut()
1977            .add(node2, NodeKind::Function, idx_key_dup, None, file_a);
1978        // (c) File registry — register a file with source_uri = dup.
1979        let fid = graph
1980            .files_mut()
1981            .register_external_with_uri(
1982                "/virtual/Exhaustive.class",
1983                Some(crate::graph::node::Language::Java),
1984                Some(file_uri_dup),
1985            )
1986            .expect("register external");
1987        // (d) Edge store — Imports edge with alias = dup.
1988        graph.edges_mut().add_edge(
1989            node,
1990            node2,
1991            EdgeKind::Imports {
1992                alias: Some(edge_alias_dup),
1993                is_wildcard: false,
1994            },
1995            file_a,
1996        );
1997
1998        let rebuild = graph.clone_for_rebuild();
1999        let finalized = rebuild.finalize().expect("finalize ok");
2000
2001        // (a) Arena: name + qualified_name on every live node.
2002        for nid in [node, node2] {
2003            let entry = finalized.nodes().get(nid).expect("node survives");
2004            assert_eq!(entry.name, arena_name_canon, "arena name canonicalised");
2005            if let Some(q) = entry.qualified_name {
2006                assert_eq!(q, arena_qname_canon, "arena qname canonicalised");
2007            }
2008        }
2009        // (b) Indices: canonical bucket populated, duplicate bucket empty.
2010        assert!(finalized.indices().by_name(idx_key_dup).is_empty());
2011        assert!(!finalized.indices().by_name(idx_key_canon).is_empty());
2012        // (c) File registry.
2013        let view = finalized
2014            .files()
2015            .file_provenance(fid)
2016            .expect("provenance present");
2017        assert_eq!(view.source_uri, Some(file_uri_canon));
2018        // (d) Edge store: every Imports alias canonical on both
2019        // directions. Post-finalize, edges live in CSR (step 9 drained
2020        // the delta into the CSR on both directions). Step 1's remap
2021        // ran on the delta *before* step 9, so the CSR is built from
2022        // the already-remapped delta.
2023        let fwd = finalized.edges().forward();
2024        let fwd_csr = fwd
2025            .csr()
2026            .expect("forward CSR must be populated after step 9");
2027        let mut fwd_imports = 0usize;
2028        for kind in fwd_csr.edge_kind() {
2029            if let EdgeKind::Imports { alias, .. } = kind {
2030                fwd_imports += 1;
2031                assert_ne!(*alias, Some(edge_alias_dup));
2032                assert_eq!(*alias, Some(edge_alias_canon));
2033            }
2034        }
2035        drop(fwd);
2036        assert!(fwd_imports > 0, "forward Imports must be present in CSR");
2037        let rev = finalized.edges().reverse();
2038        let rev_csr = rev
2039            .csr()
2040            .expect("reverse CSR must be populated after step 9");
2041        let mut rev_imports = 0usize;
2042        for kind in rev_csr.edge_kind() {
2043            if let EdgeKind::Imports { alias, .. } = kind {
2044                rev_imports += 1;
2045                assert_ne!(*alias, Some(edge_alias_dup));
2046                assert_eq!(*alias, Some(edge_alias_canon));
2047            }
2048        }
2049        drop(rev);
2050        assert!(rev_imports > 0, "reverse Imports must be present in CSR");
2051
2052        // All duplicate slots recycled; canonical slots retained.
2053        for (dup, canon, label) in [
2054            (arena_name_dup, arena_name_canon, "arena_name"),
2055            (arena_qname_dup, arena_qname_canon, "arena_qname"),
2056            (idx_key_dup, idx_key_canon, "idx_key"),
2057            (edge_alias_dup, edge_alias_canon, "edge_alias"),
2058            (file_uri_dup, file_uri_canon, "file_uri"),
2059        ] {
2060            assert_eq!(
2061                finalized.strings().ref_count(dup),
2062                0,
2063                "{label}: duplicate slot must be recycled (ref_count == 0)"
2064            );
2065            assert!(
2066                finalized.strings().ref_count(canon) > 0,
2067                "{label}: canonical slot must retain references (got {})",
2068                finalized.strings().ref_count(canon)
2069            );
2070        }
2071    }
2072
2073    // ----------------------------------------------------------------
2074    // Iter-2 B2: step 6 + step 13 per_file_nodes / bucket bijection.
2075    // ----------------------------------------------------------------
2076
2077    #[test]
2078    fn finalize_step6_drops_tombstoned_nodes_from_buckets() {
2079        let (graph, a, b, c) = seeded_graph();
2080        // Seed the registry buckets so finalize step 6 has real work.
2081        let file_a = FileId::new(1);
2082        let file_b = FileId::new(2);
2083        {
2084            // Direct access via clone-for-rebuild — we need to populate
2085            // the REBUILD-local registry because `seeded_graph` doesn't.
2086        }
2087        // Populate buckets on the graph first so clone captures them.
2088        {
2089            let files = graph.files();
2090            let _ = files; // ensure immut ref scope
2091        }
2092        let mut graph = graph;
2093        graph.files_mut().record_node(file_a, a);
2094        graph.files_mut().record_node(file_a, b);
2095        graph.files_mut().record_node(file_b, c);
2096
2097        let mut rebuild = graph.clone_for_rebuild();
2098        rebuild.tombstone(a);
2099        rebuild.tombstone(c);
2100        let finalized = rebuild.finalize().expect("finalize ok");
2101
2102        // a and c were tombstoned — must be absent from any bucket.
2103        // b survives under file_a; file_b's bucket collapsed to empty
2104        // and must be dropped.
2105        let buckets: std::collections::BTreeMap<FileId, Vec<crate::graph::unified::node::NodeId>> =
2106            finalized.files().per_file_nodes_for_gate0d().collect();
2107        assert_eq!(buckets.len(), 1, "empty bucket must be dropped");
2108        assert_eq!(buckets.get(&file_a).cloned().unwrap_or_default(), vec![b]);
2109        assert!(!buckets.contains_key(&file_b));
2110    }
2111
2112    #[test]
2113    fn finalize_step6_dedups_within_bucket() {
2114        let (mut graph, a, b, c) = seeded_graph();
2115        let file_a = FileId::new(1);
2116        let file_b = FileId::new(2);
2117        // Bucket every live node consistently with seeded_graph's own
2118        // FileId assignment (so the non-vacuous bijection check passes),
2119        // but duplicate `a` once to exercise step 6's dedup path.
2120        graph.files_mut().record_node(file_a, a);
2121        graph.files_mut().record_node(file_a, a); // intentional duplicate
2122        graph.files_mut().record_node(file_a, b);
2123        graph.files_mut().record_node(file_b, c);
2124        let rebuild = graph.clone_for_rebuild();
2125        let finalized = rebuild.finalize().expect("finalize ok");
2126        let buckets: std::collections::BTreeMap<FileId, Vec<crate::graph::unified::node::NodeId>> =
2127            finalized.files().per_file_nodes_for_gate0d().collect();
2128        let bucket_a = buckets.get(&file_a).expect("bucket for file_a");
2129        // Two live nodes (a, b) in file_a, duplicate `a` collapsed.
2130        assert_eq!(
2131            bucket_a.len(),
2132            2,
2133            "duplicates within bucket must be dedup'd"
2134        );
2135        assert!(bucket_a.contains(&a));
2136        assert!(bucket_a.contains(&b));
2137    }
2138
2139    #[test]
2140    fn finalize_step6_drops_empty_buckets() {
2141        let (mut graph, a, _b, _c) = seeded_graph();
2142        let file = FileId::new(1);
2143        graph.files_mut().record_node(file, a);
2144        let mut rebuild = graph.clone_for_rebuild();
2145        rebuild.tombstone(a); // drops the only bucket member
2146        let finalized = rebuild.finalize().expect("finalize ok");
2147        assert_eq!(
2148            finalized.files().per_file_bucket_count(),
2149            0,
2150            "empty bucket must be dropped by step 6"
2151        );
2152    }
2153
2154    #[test]
2155    fn bucket_bijection_passes_when_every_live_node_is_bucketed() {
2156        let (mut graph, a, b, c) = seeded_graph();
2157        // Bucket every live node consistently with each node's FileId.
2158        // seeded_graph uses file_a=1 for a, b and file_b=2 for c.
2159        let file_a = FileId::new(1);
2160        let file_b = FileId::new(2);
2161        graph.files_mut().record_node(file_a, a);
2162        graph.files_mut().record_node(file_a, b);
2163        graph.files_mut().record_node(file_b, c);
2164
2165        // Round-trip through finalize so the bijection check runs.
2166        let rebuild = graph.clone_for_rebuild();
2167        let finalized = rebuild.finalize().expect("finalize ok");
2168        // Explicit assert_bucket_bijection call also passes.
2169        finalized.assert_bucket_bijection();
2170    }
2171
2172    #[test]
2173    #[should_panic(expected = "duplicate node")]
2174    fn bucket_bijection_detects_duplicate_within_bucket() {
2175        let (mut graph, a, _b, _c) = seeded_graph();
2176        let file_a = FileId::new(1);
2177        // Manually force a duplicate inside a bucket by bypassing
2178        // `retain_nodes_in_buckets`. We achieve this by calling
2179        // `record_node` twice on the live graph and then invoking the
2180        // bijection check directly *without* routing through finalize
2181        // (which would dedup in step 6).
2182        graph.files_mut().record_node(file_a, a);
2183        graph.files_mut().record_node(file_a, a);
2184        graph.assert_bucket_bijection();
2185    }
2186
2187    #[test]
2188    #[should_panic(expected = "misfiled")]
2189    fn bucket_bijection_detects_misfiled_node() {
2190        let (mut graph, a, _b, _c) = seeded_graph();
2191        // Node `a` was allocated against FileId(1); put it in a bucket
2192        // keyed by FileId(99) — the bijection must reject with the
2193        // precise "misfiled" panic message emitted by
2194        // `CodeGraph::assert_bucket_bijection` (concurrent/graph.rs
2195        // around line 865). A broader `expected = "node"` would also
2196        // match the duplicate-node / dead-node / multi-bucket arms, so
2197        // it would not discriminate the specific failure mode under test.
2198        graph.files_mut().record_node(FileId::new(99), a);
2199        graph.assert_bucket_bijection();
2200    }
2201
2202    #[test]
2203    #[should_panic(expected = "absent from all buckets")]
2204    fn bucket_bijection_detects_missing_live_node() {
2205        let (mut graph, a, _b, c) = seeded_graph();
2206        // Bucket one node but not the other; with at least one populated
2207        // bucket, condition (d) becomes strict and the missing live node
2208        // must trigger a panic.
2209        let file_a = FileId::new(1);
2210        let _ = c;
2211        graph.files_mut().record_node(file_a, a);
2212        graph.assert_bucket_bijection();
2213    }
2214
2215    #[test]
2216    #[should_panic(expected = "dead node")]
2217    fn bucket_bijection_detects_dead_node_in_bucket() {
2218        let (mut graph, a, _b, _c) = seeded_graph();
2219        // Tombstone a, then leave it in a bucket — bijection must reject.
2220        let file_a = FileId::new(1);
2221        graph.files_mut().record_node(file_a, a);
2222        graph.nodes_mut().remove(a);
2223        graph.assert_bucket_bijection();
2224    }
2225
2226    // -----------------------------------------------------------------
2227    // Gate 0d — Tombstone-residue negative tests.
2228    //
2229    // Covers both the `RebuildGraph::assert_no_tombstone_residue`
2230    // diagnostic helper and the finalize step-14 publish-boundary
2231    // check. The bijection counterparts live in the block immediately
2232    // above.
2233    // -----------------------------------------------------------------
2234
2235    #[test]
2236    #[should_panic(expected = "still in edge store")]
2237    fn rebuild_graph_residue_detects_tombstone_still_in_edge_store() {
2238        // Gate 0d iter-1 Minor — dedicated negative test for the
2239        // edge-store residue arm.
2240        //
2241        // The residue check iterates K-rows in order (see
2242        // `assert_no_tombstone_residue` above): NodeArena → edges →
2243        // AuxiliaryIndices → macro_metadata → ... — so to exercise the
2244        // `edges` arm specifically we must remove `a` from the
2245        // NodeArena first (so the NodeArena arm does not fire), leave
2246        // the edge that references `a` intact, and tombstone it.
2247        use crate::graph::unified::edge::kind::EdgeKind;
2248        let (graph, a, b, _c) = seeded_graph();
2249        let mut rebuild = graph.clone_for_rebuild();
2250        // Seed the edge store with an edge that references `a`. This
2251        // becomes the "dangling reference" the residue check must
2252        // detect: `a` will be removed from the arena but the edge
2253        // keeps pointing at it.
2254        rebuild.edges.add_edge(
2255            a,
2256            b,
2257            EdgeKind::Calls {
2258                argument_count: 0,
2259                is_async: false,
2260            },
2261            FileId::new(1),
2262        );
2263        // Remove `a` from the arena so the K.A1 arm passes.
2264        rebuild.nodes.remove(a);
2265        rebuild.tombstone(a);
2266        rebuild.assert_no_tombstone_residue();
2267    }
2268
2269    #[test]
2270    #[should_panic(expected = "still in auxiliary indices")]
2271    fn rebuild_graph_residue_detects_tombstone_still_in_auxiliary_indices() {
2272        // The residue check iterates K-rows starting at `NodeArena`, so
2273        // to exercise the `AuxiliaryIndices` arm specifically we must
2274        // first remove `a` from the arena (simulating step 2 of
2275        // finalize), leaving the `AuxiliaryIndices` stale reference
2276        // as the first arm that can trip. This reproduces the exact
2277        // failure mode the pre-finalize helper exists to catch: a bug
2278        // where finalize compacts the arena but forgets one index.
2279        let (graph, a, _b, _c) = seeded_graph();
2280        let mut rebuild = graph.clone_for_rebuild();
2281        rebuild.nodes.remove(a);
2282        rebuild.tombstone(a);
2283        rebuild.assert_no_tombstone_residue();
2284    }
2285
2286    #[test]
2287    #[should_panic(expected = "still in NodeArena")]
2288    fn rebuild_graph_residue_detects_tombstone_still_in_node_arena() {
2289        // NodeArena is the first `NodeIdBearing` in the residue check's
2290        // iteration order, so we can trip it with a raw `tombstone(a)`
2291        // call before we touch any auxiliary index. The live arena
2292        // entry for `a` remains — that is the bug we want to surface.
2293        let (graph, a, _b, _c) = seeded_graph();
2294        let mut rebuild = graph.clone_for_rebuild();
2295        // Prove the arena still contains `a` before the assertion so
2296        // the panic expectation maps to a real condition, not a vacuous
2297        // empty-tombstone skip.
2298        assert!(
2299            rebuild.nodes.get(a).is_some(),
2300            "pre-condition: arena must still contain the staged tombstone"
2301        );
2302        rebuild.tombstone(a);
2303        rebuild.assert_no_tombstone_residue();
2304    }
2305
2306    #[test]
2307    fn rebuild_graph_residue_is_noop_on_empty_tombstone_set() {
2308        // Positive: with no tombstones staged, the assertion must be a
2309        // no-op — otherwise every fresh `clone_for_rebuild` would panic.
2310        let (graph, _a, _b, _c) = seeded_graph();
2311        let rebuild = graph.clone_for_rebuild();
2312        assert_eq!(rebuild.pending_tombstone_count(), 0);
2313        rebuild.assert_no_tombstone_residue();
2314    }
2315
2316    #[test]
2317    #[should_panic(expected = "still in NodeArena")]
2318    fn finalize_step14_residue_detects_live_reference_to_drained_node() {
2319        // Drive the `finalize` step-14 residue assertion through the
2320        // publish-boundary helper. `seeded_graph()` returns a graph in
2321        // which `a` is a live NodeArena entry; passing `a` in the
2322        // `drained` set without actually removing the arena slot
2323        // simulates the bug step 14 exists to catch — step 8 drained
2324        // `a` into `drained_tombstones`, but something failed to
2325        // compact the arena. The residue check's K-row iteration
2326        // starts at `NodeArena`, so that is the arm that fires.
2327        //
2328        // The test routes through the canonical
2329        // `publish::assert_publish_invariants` helper so the test
2330        // exercises the exact code path `finalize` step 14 executes,
2331        // not just the underlying `assert_no_tombstone_residue_for`
2332        // entry point.
2333        let (graph, a, _b, _c) = seeded_graph();
2334        let mut drained: ::std::collections::HashSet<NodeId> = ::std::collections::HashSet::new();
2335        drained.insert(a);
2336        crate::graph::unified::publish::assert_publish_invariants(&graph, &drained);
2337    }
2338
2339    #[test]
2340    fn finalize_with_empty_drained_set_passes_publish_invariants() {
2341        // Positive smoke test: the happy path through finalize (no
2342        // tombstones staged) must pass `assert_publish_invariants`
2343        // unconditionally on every build profile. This is the case the
2344        // full-rebuild `build_unified_graph_inner` end-of-function call
2345        // exercises on every CI run.
2346        let (graph, _a, _b, _c) = seeded_graph();
2347        let file_a = FileId::new(1);
2348        let file_b = FileId::new(2);
2349        let mut graph = graph;
2350        graph.files_mut().record_node(file_a, _a);
2351        graph.files_mut().record_node(file_a, _b);
2352        graph.files_mut().record_node(file_b, _c);
2353
2354        let rebuild = graph.clone_for_rebuild();
2355        let finalized = rebuild.finalize().expect("finalize ok");
2356        crate::graph::unified::publish::assert_publish_invariants(
2357            &finalized,
2358            &::std::collections::HashSet::new(),
2359        );
2360    }
2361
2362    // ---- Task 4 Step 3 — RebuildGraph::remove_file ------------------
2363
2364    /// Seed a graph + rebuild with 2 files × `per_file` nodes and a
2365    /// mix of intra- and cross-file `Calls` edges, then clone for
2366    /// rebuild. Returns `(rebuild, file_a, file_b, file_a_nodes,
2367    /// file_b_nodes)` — mirrors `seed_two_file_graph` in the
2368    /// `concurrent::graph::tests` module but yields the rebuild value
2369    /// so tests here can drive `RebuildGraph::remove_file` directly.
2370    fn seed_two_file_rebuild(
2371        per_file: usize,
2372    ) -> (
2373        RebuildGraph,
2374        crate::graph::unified::file::FileId,
2375        crate::graph::unified::file::FileId,
2376        Vec<NodeId>,
2377        Vec<NodeId>,
2378    ) {
2379        use crate::graph::unified::edge::EdgeKind;
2380        use crate::graph::unified::node::NodeKind;
2381        use crate::graph::unified::storage::arena::NodeEntry;
2382        use std::path::Path;
2383
2384        let mut graph = CodeGraph::new();
2385        let sym = graph.strings_mut().intern("sym").expect("intern");
2386        let file_a = graph
2387            .files_mut()
2388            .register(Path::new("/tmp/rebuild_remove_file_test/a.rs"))
2389            .expect("register a");
2390        let file_b = graph
2391            .files_mut()
2392            .register(Path::new("/tmp/rebuild_remove_file_test/b.rs"))
2393            .expect("register b");
2394
2395        let mut file_a_nodes = Vec::with_capacity(per_file);
2396        let mut file_b_nodes = Vec::with_capacity(per_file);
2397        for _ in 0..per_file {
2398            let n = graph
2399                .nodes_mut()
2400                .alloc(NodeEntry::new(NodeKind::Function, sym, file_a))
2401                .expect("alloc a");
2402            file_a_nodes.push(n);
2403            graph.files_mut().record_node(file_a, n);
2404            graph
2405                .indices_mut()
2406                .add(n, NodeKind::Function, sym, None, file_a);
2407        }
2408        for _ in 0..per_file {
2409            let n = graph
2410                .nodes_mut()
2411                .alloc(NodeEntry::new(NodeKind::Function, sym, file_b))
2412                .expect("alloc b");
2413            file_b_nodes.push(n);
2414            graph.files_mut().record_node(file_b, n);
2415            graph
2416                .indices_mut()
2417                .add(n, NodeKind::Function, sym, None, file_b);
2418        }
2419        for i in 0..per_file.saturating_sub(1) {
2420            graph.edges_mut().add_edge(
2421                file_a_nodes[i],
2422                file_a_nodes[i + 1],
2423                EdgeKind::Calls {
2424                    argument_count: 0,
2425                    is_async: false,
2426                },
2427                file_a,
2428            );
2429            graph.edges_mut().add_edge(
2430                file_b_nodes[i],
2431                file_b_nodes[i + 1],
2432                EdgeKind::Calls {
2433                    argument_count: 0,
2434                    is_async: false,
2435                },
2436                file_b,
2437            );
2438        }
2439        graph.edges_mut().add_edge(
2440            file_a_nodes[0],
2441            file_b_nodes[0],
2442            EdgeKind::Calls {
2443                argument_count: 0,
2444                is_async: false,
2445            },
2446            file_a,
2447        );
2448        graph.edges_mut().add_edge(
2449            file_b_nodes[0],
2450            file_a_nodes[0],
2451            EdgeKind::Calls {
2452                argument_count: 0,
2453                is_async: false,
2454            },
2455            file_b,
2456        );
2457
2458        let rebuild = graph.clone_for_rebuild();
2459        (rebuild, file_a, file_b, file_a_nodes, file_b_nodes)
2460    }
2461
2462    #[test]
2463    fn rebuild_remove_file_tombstones_all_per_file_nodes() {
2464        let (mut rebuild, file_a, _file_b, file_a_nodes, _) = seed_two_file_rebuild(3);
2465
2466        let returned = rebuild.remove_file(file_a);
2467
2468        let returned_set: std::collections::HashSet<NodeId> = returned.iter().copied().collect();
2469        let expected_set: std::collections::HashSet<NodeId> =
2470            file_a_nodes.iter().copied().collect();
2471        assert_eq!(
2472            returned_set, expected_set,
2473            "remove_file must return exactly the file_a nodes drained from the bucket"
2474        );
2475        // Each returned NodeId is arena-gone on the rebuild.
2476        for nid in &file_a_nodes {
2477            assert!(
2478                rebuild.nodes.get(*nid).is_none(),
2479                "node {nid:?} from removed file must be tombstoned on rebuild arena"
2480            );
2481        }
2482        // And staged for the finalize-time NodeIdBearing sweep.
2483        assert_eq!(rebuild.pending_tombstone_count(), file_a_nodes.len());
2484    }
2485
2486    #[test]
2487    fn rebuild_remove_file_invalidates_all_edges_sourced_or_targeted_at_removed_nodes() {
2488        let (mut rebuild, file_a, _file_b, file_a_nodes, file_b_nodes) = seed_two_file_rebuild(3);
2489
2490        // Seed produces 6 forward delta edges (2 intra-A + 2 intra-B
2491        // + 2 cross). The rebuild's own forward/reverse edge stores
2492        // mirror this.
2493        assert_eq!(rebuild.edges.forward().delta().len(), 6);
2494
2495        let _ = rebuild.remove_file(file_a);
2496
2497        // After removal: only 2 intra-B edges remain in each direction.
2498        assert_eq!(rebuild.edges.forward().delta().len(), 2);
2499        assert_eq!(rebuild.edges.reverse().delta().len(), 2);
2500
2501        // Cross-file edge b[0] -> a[0] must be gone from any direction.
2502        let b0 = file_b_nodes[0];
2503        let a0 = file_a_nodes[0];
2504        let from_b0: Vec<_> = rebuild
2505            .edges
2506            .edges_from(b0)
2507            .into_iter()
2508            .filter(|e| e.target == a0)
2509            .collect();
2510        assert!(
2511            from_b0.is_empty(),
2512            "edge b0 -> a0 must be gone after rebuild.remove_file(file_a)"
2513        );
2514    }
2515
2516    #[test]
2517    fn rebuild_remove_file_drops_file_registry_entry() {
2518        let (mut rebuild, file_a, _file_b, _, _) = seed_two_file_rebuild(2);
2519
2520        assert!(rebuild.files.resolve(file_a).is_some());
2521        assert!(!rebuild.files.nodes_for_file(file_a).is_empty());
2522
2523        let _ = rebuild.remove_file(file_a);
2524
2525        assert!(
2526            rebuild.files.resolve(file_a).is_none(),
2527            "rebuild FileRegistry entry must be gone"
2528        );
2529        assert!(
2530            rebuild.files.nodes_for_file(file_a).is_empty(),
2531            "rebuild per-file bucket for file_a must be drained"
2532        );
2533    }
2534
2535    #[test]
2536    fn rebuild_remove_file_is_idempotent_on_unknown_file() {
2537        use crate::graph::unified::file::FileId;
2538        let (mut rebuild, _file_a, _file_b, _, _) = seed_two_file_rebuild(2);
2539
2540        let nodes_before = rebuild.nodes.len();
2541        let delta_fwd_before = rebuild.edges.forward().delta().len();
2542        let delta_rev_before = rebuild.edges.reverse().delta().len();
2543        let tombstones_before = rebuild.pending_tombstone_count();
2544
2545        let bogus = FileId::new(9999);
2546        let returned = rebuild.remove_file(bogus);
2547        assert!(returned.is_empty());
2548
2549        assert_eq!(rebuild.nodes.len(), nodes_before);
2550        assert_eq!(rebuild.edges.forward().delta().len(), delta_fwd_before);
2551        assert_eq!(rebuild.edges.reverse().delta().len(), delta_rev_before);
2552        assert_eq!(rebuild.pending_tombstone_count(), tombstones_before);
2553    }
2554
2555    #[test]
2556    fn rebuild_remove_file_stages_tombstones_for_finalize_sweep() {
2557        // The whole point of RebuildGraph::remove_file deferring the
2558        // K.A/K.B sweep to finalize() is that the sweep happens exactly
2559        // once against the union of every file's tombstones. Drive this
2560        // end-to-end: remove both files, finalize, and assert the
2561        // publish-boundary invariants hold.
2562        let (mut rebuild, file_a, file_b, file_a_nodes, file_b_nodes) = seed_two_file_rebuild(2);
2563
2564        let _ = rebuild.remove_file(file_a);
2565        let _ = rebuild.remove_file(file_b);
2566
2567        // Pending tombstones: union of both files' nodes.
2568        assert_eq!(
2569            rebuild.pending_tombstone_count(),
2570            file_a_nodes.len() + file_b_nodes.len()
2571        );
2572
2573        // Pre-finalize residue check against the rebuild-local state
2574        // is expected to pass: every NodeIdBearing surface on the
2575        // rebuild must already be clean of the tombstoned IDs (via the
2576        // immediate arena + edge tombstoning in step 1–2 of
2577        // remove_file) — NO, the NodeIdBearing K.A/K.B surfaces beyond
2578        // NodeArena/edges are NOT touched before finalize. The
2579        // assert_no_tombstone_residue helper on RebuildGraph walks
2580        // every surface, so it will legitimately find tombstones in
2581        // the auxiliary indices + metadata stores until finalize runs.
2582        //
2583        // So: do NOT run the pre-finalize residue check here. Instead,
2584        // confirm finalize runs successfully and the assembled
2585        // CodeGraph passes the publish-boundary invariants against
2586        // the drained set — that is the real post-condition.
2587        let finalized = rebuild.finalize().expect("finalize must succeed");
2588
2589        // Every surface on the finalized CodeGraph must be clean of the
2590        // tombstoned ids. Use the publish-boundary residue helper
2591        // against an empty dead set (finalize's own step-14 call already
2592        // covered the drained set; we re-verify the bijection for
2593        // paranoia).
2594        crate::graph::unified::publish::assert_publish_bijection(&finalized);
2595        // Arena is empty (every seeded node was tombstoned).
2596        assert_eq!(finalized.nodes().len(), 0);
2597        // No file registration survives.
2598        assert!(finalized.files().resolve(file_a).is_none());
2599        assert!(finalized.files().resolve(file_b).is_none());
2600    }
2601
2602    #[test]
2603    fn rebuild_remove_file_repeated_calls_are_idempotent() {
2604        let (mut rebuild, file_a, _file_b, file_a_nodes, _) = seed_two_file_rebuild(3);
2605
2606        let first = rebuild.remove_file(file_a);
2607        assert_eq!(first.len(), file_a_nodes.len());
2608        let staged = rebuild.pending_tombstone_count();
2609
2610        // Second call: bucket is already drained, NodeArena::remove
2611        // ignores stale generations, and the tombstones set should not
2612        // grow. The immediate tombstone side effects are idempotent.
2613        let second = rebuild.remove_file(file_a);
2614        assert!(second.is_empty());
2615        assert_eq!(rebuild.pending_tombstone_count(), staged);
2616    }
2617
2618    #[test]
2619    fn rebuild_remove_file_clears_file_segments_entry() {
2620        // Iter-1 Codex review fix mirror of the CodeGraph-side test
2621        // in `concurrent/graph.rs`. Seed a segment for file A, invoke
2622        // `RebuildGraph::remove_file`, and assert both the
2623        // rebuild-local `file_segments` and the finalized `CodeGraph`'s
2624        // `file_segments` table carry no entry for file A. The second
2625        // half of the check is critical: `finalize()` step 12 publishes
2626        // `self.file_segments` verbatim, so a missing clear here would
2627        // leak the stale range into the publishable graph.
2628        use std::path::Path;
2629
2630        let (mut rebuild, file_a, _file_b, file_a_nodes, _) = seed_two_file_rebuild(3);
2631
2632        // Seed a segment for file A. Fish out the span from the
2633        // allocated NodeIds to mirror the shape `phase3_parallel_commit`
2634        // produces.
2635        let first_index = file_a_nodes
2636            .iter()
2637            .map(|n| n.index())
2638            .min()
2639            .expect("per_file = 3");
2640        let last_index = file_a_nodes
2641            .iter()
2642            .map(|n| n.index())
2643            .max()
2644            .expect("per_file = 3");
2645        let slot_count = last_index - first_index + 1;
2646        rebuild
2647            .file_segments
2648            .record_range(file_a, first_index, slot_count);
2649        assert!(
2650            rebuild.file_segments().get(file_a).is_some(),
2651            "seeded segment for file_a must be present before remove_file"
2652        );
2653
2654        // Act.
2655        let _ = rebuild.remove_file(file_a);
2656
2657        // Rebuild-local post-condition.
2658        assert!(
2659            rebuild.file_segments().get(file_a).is_none(),
2660            "RebuildGraph::remove_file must clear the file_segments entry"
2661        );
2662
2663        // Finalize post-condition: the published CodeGraph must also
2664        // carry no entry for file_a.
2665        let finalized = rebuild.finalize().expect("finalize must succeed");
2666        assert!(
2667            finalized.file_segments().get(file_a).is_none(),
2668            "finalize must publish a CodeGraph with no stale file_segments for file_a"
2669        );
2670
2671        // Defensive suppression: the Path import is used only by the
2672        // adjacent `seed_two_file_rebuild` helper when the signature
2673        // is exercised — the let-binding below keeps this test
2674        // self-contained even if the helper's surface changes later.
2675        let _ = Path::new("");
2676    }
2677}