Skip to main content

sqry_core/graph/unified/rebuild/
rebuild_graph.rs

1//! [A2 §H] `RebuildGraph` + `clone_for_rebuild` + `finalize()` — Gate 0c.
2//!
3//! **This module is feature-gated.** Every item lives behind
4//! `#[cfg(feature = "rebuild-internals")]`; only `sqry-daemon` is
5//! whitelisted to enable the feature (see
6//! `sqry-core/tests/rebuild_internals_whitelist.rs` and the plan at
7//! `docs/superpowers/plans/2026-03-19-sqryd-daemon.md` §H "Placement
8//! and feature gate").
9//!
10//! # What this module delivers
11//!
12//! Per the plan's A2 §H (pre-implementation Gate 0c, required before
13//! any `clone_for_rebuild` caller in Task 4 Step 4 can exist):
14//!
15//! 1. [`sqry_graph_fields!`] — a **single source of truth** macro that
16//!    enumerates every field on [`super::super::concurrent::CodeGraph`].
17//!    It is invoked in three places by the rest of this module:
18//!      - `sqry_graph_fields!(@decl_rebuild)` declares the
19//!        [`RebuildGraph`] struct.
20//!      - `sqry_graph_fields!(@clone_inner from self)` is expanded
21//!        inside [`CodeGraph::clone_for_rebuild`]; the destructure
22//!        `let CodeGraph { .. } = self;` is exhaustive, so adding a
23//!        field to [`CodeGraph`] without adding it to the macro's
24//!        field list is a hard `E0027` compile error.
25//!      - `sqry_graph_fields!(@field_names)` emits a `&[&str]` of
26//!        every field name, used by the Gate 0d bijection check
27//!        (future) and by diagnostics.
28//! 2. [`CodeGraph::clone_for_rebuild`] — deep-copies every Arc-wrapped
29//!    field into an owned, rebuild-local [`RebuildGraph`]. The rebuild
30//!    dispatcher (Task 4 Step 4) is the only caller.
31//! 3. [`RebuildGraph::finalize`] — the canonical 14-step sequence
32//!    spelled out in §H lines 658–707. Every step is either an API
33//!    call on an owned component (freeze interner, compact arena,
34//!    compact edges, compact indices, compact metadata, compact file
35//!    buckets, compact K-list extras), a drain/move of scratch state
36//!    (drain tombstones), a derived-state reset (rebuild CSR), a
37//!    per-language metadata update (confidence), a scalar bump (epoch),
38//!    a struct assembly (`Arc::new` wrapping), or a debug invariant
39//!    (bucket bijection + tombstone residue).
40//!
41//! # Why the macro is kind-tagged, not a single callback
42//!
43//! The plan's literal example invokes `sqry_graph_fields!(CodeGraph,
44//! RebuildGraph)` to generate both structs from one sibling macro
45//! call. That shape would require `CodeGraph` to be declared by the
46//! macro — but `CodeGraph` has dozens of `impl` blocks, Clone
47//! semantics, and accessor methods that already destructure the fields
48//! by name on stable Rust. Moving the declaration into a macro
49//! expansion would obscure every one of those sites.
50//!
51//! A callback-based idiom (`sqry_graph_fields!(my_cb!)`) was also
52//! considered; it worked for the struct declaration but collapsed
53//! under `macro_rules!` hygiene constraints for the destructure-and-
54//! construct clone path (the `self` keyword cannot be synthesised by
55//! a macro; passing identifiers across a callback boundary requires
56//! forwarding them through an `:expr` metavar, which loses the
57//! token-level decomposition needed for Arc-vs-scalar dispatch).
58//!
59//! The **kind-tagged single macro** settled on here expands inline at
60//! the call site — `sqry_graph_fields!(@clone_inner from self)` —
61//! so the destructure runs in the caller's hygiene context, the
62//! `self` keyword is the caller's `self`, and the per-field Arc /
63//! scalar / owned dispatch is baked into the macro body without a
64//! secondary callback hop.
65//!
66//! The single-source-of-truth guarantee is the same: adding a field
67//! requires touching (a) the `CodeGraph` struct in
68//! `concurrent/graph.rs`, (b) the field list inside this macro, and
69//! (c) the step in `finalize()` that compacts it. Missing any of
70//! (a) or (b) makes `clone_for_rebuild_inner` fail to compile.
71//!
72//! # Release-build posture
73//!
74//! The debug bucket-bijection and tombstone-residue checks at steps 13
75//! and 14 are gated on `cfg(any(debug_assertions, test))`. Release
76//! builds pay zero overhead. The §E equivalence harness in CI is the
77//! release-time drift backstop (plan §F "Release build semantics").
78
79// Module-level `#[cfg(feature = "rebuild-internals")]` lives on the
80// `pub mod rebuild_graph;` declaration in the parent `mod.rs`; duplicating
81// it here trips the `duplicated_attributes` clippy lint.
82#![allow(clippy::too_many_lines)]
83
84use std::collections::{HashMap, HashSet};
85
86use crate::confidence::ConfidenceMetadata;
87use crate::graph::error::GraphResult;
88use crate::graph::unified::bind::alias::AliasTable;
89use crate::graph::unified::bind::scope::ScopeArena;
90use crate::graph::unified::bind::scope::provenance::ScopeProvenanceStore;
91use crate::graph::unified::bind::shadow::ShadowTable;
92use crate::graph::unified::concurrent::CodeGraph;
93use crate::graph::unified::edge::bidirectional::BidirectionalEdgeStore;
94use crate::graph::unified::node::NodeId;
95use crate::graph::unified::storage::arena::NodeArena;
96use crate::graph::unified::storage::c_indirect::CIndirectSideTables;
97use crate::graph::unified::storage::edge_provenance::EdgeProvenanceStore;
98use crate::graph::unified::storage::indices::AuxiliaryIndices;
99use crate::graph::unified::storage::interner::StringInterner;
100use crate::graph::unified::storage::metadata::NodeMetadataStore;
101use crate::graph::unified::storage::node_provenance::NodeProvenanceStore;
102use crate::graph::unified::storage::registry::FileRegistry;
103use crate::graph::unified::storage::segment::FileSegmentTable;
104
105use super::coverage::NodeIdBearing;
106
107// =====================================================================
108// Single source of truth: field list
109// =====================================================================
110
111/// Single source of truth for the [`CodeGraph`] field list.
112///
113/// This macro takes a **kind-tag** selector (`@decl_rebuild`,
114/// `@clone_inner from ...`, `@field_names`) and emits a tailored
115/// expansion for each consumer. The field-list rows live exactly once,
116/// inside this macro body, so any new field is added in one place:
117/// here.
118///
119/// # Adding / renaming / removing a field on [`CodeGraph`]
120///
121/// 1. Edit the `CodeGraph` struct in
122///    `sqry-core/src/graph/unified/concurrent/graph.rs` (declaration
123///    order matters — match the order here).
124/// 2. Edit the field list below. The `@decl_rebuild` arm auto-mirrors
125///    into `RebuildGraph`; the `@clone_inner` arm auto-mirrors into
126///    `clone_for_rebuild_inner`.
127/// 3. Extend `RebuildGraph::finalize()` with the step that compacts
128///    the new field (and, if it carries `NodeId`s, add a K.A/K.B row
129///    + `NodeIdBearing` impl in `super::coverage`).
130///
131/// Missing step (1) or (2) is a hard `E0027` compile error in the
132/// `let CodeGraph { .. } = ...` destructure inside the `@clone_inner`
133/// arm. Step (3) is enforced by the §F tombstone-residue check
134/// (Gate 0d) and by the §E incremental-vs-full equivalence harness.
135#[macro_export]
136macro_rules! sqry_graph_fields {
137    // -------------------------------------------------------------
138    // Arm @decl_rebuild: emit the `RebuildGraph` struct declaration.
139    // -------------------------------------------------------------
140    (@decl_rebuild) => {
141        /// Owned, rebuild-local mirror of [`CodeGraph`].
142        ///
143        /// Each field stores a value type rather than an [`Arc`], so
144        /// the rebuild task can mutate freely without touching the
145        /// live published graph. The only path to produce a
146        /// publishable [`CodeGraph`] from a [`RebuildGraph`] is
147        /// [`RebuildGraph::finalize`]; the trybuild fixture at
148        /// `sqry-core/tests/rebuild_internals_compile_fail/rebuild_graph_no_public_assembly.rs`
149        /// locks that invariant in.
150        pub struct RebuildGraph {
151            pub(crate) nodes: NodeArena,
152            pub(crate) edges: BidirectionalEdgeStore,
153            pub(crate) strings: StringInterner,
154            pub(crate) files: FileRegistry,
155            pub(crate) indices: AuxiliaryIndices,
156            pub(crate) macro_metadata: NodeMetadataStore,
157            pub(crate) node_provenance: NodeProvenanceStore,
158            pub(crate) edge_provenance: EdgeProvenanceStore,
159            pub(crate) fact_epoch: u64,
160            pub(crate) epoch: u64,
161            pub(crate) confidence: HashMap<String, ConfidenceMetadata>,
162            pub(crate) scope_arena: ScopeArena,
163            pub(crate) alias_table: AliasTable,
164            pub(crate) shadow_table: ShadowTable,
165            pub(crate) scope_provenance_store: ScopeProvenanceStore,
166            pub(crate) file_segments: FileSegmentTable,
167            /// Phase A (U09): C indirect-call resolver side tables.
168            /// `None` outside C workspaces; cloned through the rebuild
169            /// pipeline so an in-flight rebuild preserves the side
170            /// tables already committed on the source graph.
171            pub(crate) c_indirect_tables: Option<CIndirectSideTables>,
172            /// Build-time scratch side-channel for the Go plugin's
173            /// post-Phase-4e `pass_go_method_set_satisfaction` (Cluster
174            /// A of the Go T1 implements-and-promotion design). Mirror
175            /// of `CodeGraph::go_hints`; held by value (no Arc) for
176            /// the same rationale as on `CodeGraph`.
177            pub(crate) go_hints: $crate::graph::unified::build::staging::GoHints,
178            /// Active tombstone set during finalize steps 2–7.
179            /// Populated by `RebuildGraph::remove_file` /
180            /// `RebuildGraph::tombstone` (added by Task 4 Step 4) and
181            /// drained into [`drained_tombstones`] at step 8.
182            pub(crate) tombstones: HashSet<NodeId>,
183            /// Snapshot of [`tombstones`] taken at finalize step 8,
184            /// kept for the debug residue check in step 14.
185            pub(crate) drained_tombstones: HashSet<NodeId>,
186        }
187    };
188
189    // -------------------------------------------------------------
190    // Arm @clone_inner: emit the body of `clone_for_rebuild_inner`.
191    //
192    // The caller passes their `self` binding as `$this:expr` so
193    // macro hygiene preserves the method-local reference. The
194    // destructure is exhaustive — adding a `CodeGraph` field without
195    // also adding it here is a hard `E0027` compile error. Removing
196    // a row here without removing the `CodeGraph` field is an unused-
197    // pattern warning that our `-D warnings` CI setting escalates.
198    // -------------------------------------------------------------
199    (@clone_inner from $this:expr) => {{
200        let CodeGraph {
201            nodes,
202            edges,
203            strings,
204            files,
205            indices,
206            macro_metadata,
207            node_provenance,
208            edge_provenance,
209            fact_epoch,
210            epoch,
211            confidence,
212            scope_arena,
213            alias_table,
214            shadow_table,
215            scope_provenance_store,
216            file_segments,
217            c_indirect_tables,
218            go_hints,
219        } = $this;
220        RebuildGraph {
221            nodes: (**nodes).clone(),
222            edges: (**edges).clone(),
223            strings: (**strings).clone(),
224            files: (**files).clone(),
225            indices: (**indices).clone(),
226            macro_metadata: (**macro_metadata).clone(),
227            node_provenance: (**node_provenance).clone(),
228            edge_provenance: (**edge_provenance).clone(),
229            fact_epoch: *fact_epoch,
230            epoch: *epoch,
231            confidence: confidence.clone(),
232            scope_arena: (**scope_arena).clone(),
233            alias_table: (**alias_table).clone(),
234            shadow_table: (**shadow_table).clone(),
235            scope_provenance_store: (**scope_provenance_store).clone(),
236            file_segments: (**file_segments).clone(),
237            c_indirect_tables: c_indirect_tables.clone(),
238            go_hints: go_hints.clone(),
239            tombstones: ::std::collections::HashSet::new(),
240            drained_tombstones: ::std::collections::HashSet::new(),
241        }
242    }};
243
244    // -------------------------------------------------------------
245    // Arm @field_names: emit a comma-separated list of every field
246    // name. Used by tests that want to assert coverage of field names
247    // against a declared list without re-parsing Rust source.
248    // -------------------------------------------------------------
249    (@field_names) => {
250        &[
251            "nodes",
252            "edges",
253            "strings",
254            "files",
255            "indices",
256            "macro_metadata",
257            "node_provenance",
258            "edge_provenance",
259            "fact_epoch",
260            "epoch",
261            "confidence",
262            "scope_arena",
263            "alias_table",
264            "shadow_table",
265            "scope_provenance_store",
266            "file_segments",
267            "c_indirect_tables",
268            "go_hints",
269        ]
270    };
271}
272
273// =====================================================================
274// RebuildGraph struct declaration (macro-driven)
275// =====================================================================
276
277sqry_graph_fields!(@decl_rebuild);
278
279// =====================================================================
280// clone_for_rebuild: CodeGraph -> RebuildGraph
281// =====================================================================
282
283impl CodeGraph {
284    /// Produce a fresh [`RebuildGraph`] from this graph's current
285    /// committed state, deep-cloning every Arc-wrapped component so the
286    /// returned value is decoupled from future mutations to `self`.
287    ///
288    /// The exhaustive destructure inside the `@clone_inner` arm of
289    /// [`sqry_graph_fields!`] guarantees that every field on
290    /// [`CodeGraph`] is mirrored into the returned [`RebuildGraph`].
291    /// Adding a field to [`CodeGraph`] without also adding it to the
292    /// field list inside `sqry_graph_fields!` is a hard `E0027`
293    /// compile error on the `let CodeGraph { .. } = self;` destructure.
294    ///
295    /// # When to call this
296    ///
297    /// Only the incremental-rebuild dispatcher (Task 4 Step 4) calls
298    /// `clone_for_rebuild`, on the rebuild task's background tokio
299    /// context, against an `Arc<CodeGraph>` freshly obtained via
300    /// `ArcSwap::load_full()`. The Arc's refcount is 1 in the common
301    /// case, so the underlying deep clones amount to `Arc::get_mut`-
302    /// equivalent cost on each component.
303    ///
304    /// # Performance budget (A2 §H, line 734)
305    ///
306    /// On a 384k-node / 1.3M-edge reference graph, this call must
307    /// complete in < 50 ms. The daemon's rebuild-latency benchmark
308    /// tracks the budget; warning threshold 50 ms, hard record
309    /// threshold 200 ms. Exceeding the warning logs but does not fail
310    /// the rebuild.
311    #[must_use]
312    pub fn clone_for_rebuild(&self) -> RebuildGraph {
313        Self::clone_for_rebuild_inner(self)
314    }
315
316    /// Inner implementation, kept separate so the public entrypoint is
317    /// a single-line shim that also makes the exhaustive destructure
318    /// visible at the top of the rebuild surface.
319    ///
320    /// The `@clone_inner from self` macro arm emits the destructure +
321    /// struct-construction inline; passing `self` as an `:expr`
322    /// metavar preserves call-site hygiene so the macro-introduced
323    /// field bindings (`nodes`, `edges`, ...) are resolvable against
324    /// the `self` binding in this method.
325    fn clone_for_rebuild_inner(&self) -> RebuildGraph {
326        sqry_graph_fields!(@clone_inner from self)
327    }
328}
329
330// =====================================================================
331// finalize(): RebuildGraph -> CodeGraph — the 14-step contract
332// =====================================================================
333
334impl RebuildGraph {
335    /// Consume this [`RebuildGraph`] and assemble a publishable
336    /// [`CodeGraph`].
337    ///
338    /// This is the **only** safe path from [`RebuildGraph`] back to
339    /// [`CodeGraph`]. No public API converts a [`RebuildGraph`] into
340    /// an [`Arc<CodeGraph>`] any other way, and no
341    /// `From<RebuildGraph> for CodeGraph` impl exists — the trybuild
342    /// fixture at
343    /// `sqry-core/tests/rebuild_internals_compile_fail/rebuild_graph_no_public_assembly.rs`
344    /// locks that property in.
345    ///
346    /// # The 14 ordered steps (plan §H lines 658–707)
347    ///
348    /// Each numbered comment below traces the matching plan step. In
349    /// debug / test builds, steps 13 and 14 also execute the §F
350    /// bijection and tombstone-residue checks; release builds compile
351    /// them out.
352    ///
353    /// Steps 2–7 uniformly invoke
354    /// [`super::coverage::NodeIdBearing::retain_nodes`] with the
355    /// closure `|nid| !self.tombstones.contains(&nid)`, so adding a new
356    /// K.A or K.B row for a future `NodeId`-bearing container
357    /// automatically extends compaction coverage once the new row's
358    /// `retain_nodes` call is appended to step 7.
359    ///
360    /// # Errors
361    ///
362    /// Returns `Err(GraphBuilderError::Internal{..})` only if a
363    /// compaction primitive fails; all current primitives are
364    /// infallible, so `finalize()` is currently infallible. The `Result`
365    /// return is preserved for future fallible compaction steps without
366    /// a signature change.
367    pub fn finalize(mut self) -> GraphResult<CodeGraph> {
368        // ----------------------------------------------------------------
369        // Step 1 — Freeze the rebuild's interner.
370        //
371        // The plan §H describes step 1 as `new_strings =
372        // self.string_builder.freeze()`. In this codebase the
373        // rebuild-local interner is a [`StringInterner`] (no separate
374        // "builder" type), so the concrete freeze operation is:
375        //
376        //   (a) canonicalise the interner by running
377        //       [`StringInterner::build_dedup_table`]. This guarantees:
378        //         * `lookup_stale == false` (required for all read-path
379        //           accessors); any in-flight bulk-alloc residue from a
380        //           prior failed rebuild is healed.
381        //         * Every string value is backed by exactly one
382        //           canonical slot — no duplicate arcs remain. The
383        //           `ref_counts` of duplicate slots are folded into
384        //           their canonical slot. `StringId` values held by
385        //           live `NodeEntry`s remain valid because this pass
386        //           only collapses slots that are *not* referenced from
387        //           live nodes (nothing in this pass renames already
388        //           live slots).
389        //   (b) rewrite every live `NodeEntry` through the remap table
390        //       produced by (a) so any node whose fields happen to
391        //       point at a now-collapsed duplicate slot is rewired to
392        //       the canonical slot. This preserves the post-freeze
393        //       invariant "every live `NodeEntry` references only
394        //       canonical `StringId`s" even if earlier rebuild steps
395        //       produced duplicate slots.
396        //   (c) prune unreferenced slots with
397        //       [`StringInterner::recycle_unreferenced`] so the frozen
398        //       interner carries only strings the final graph actually
399        //       needs. This is the step that drives
400        //       `interner_live_ratio < interner_compaction_threshold`
401        //       down over time (plan §H line 713) so the next
402        //       housekeeping rebuild observes an accurate live ratio.
403        //
404        // After this block runs, `self.strings` is in a fully
405        // canonical, shared-immutable-ready state. Step 12's
406        // `Arc::new(new_strings)` is then purely the ownership-to-
407        // shared transition; the semantic freeze itself has already
408        // happened here.
409        // ----------------------------------------------------------------
410        let string_remap = self.strings.build_dedup_table();
411        if !string_remap.is_empty() {
412            // Rewrite every live node's interned-string fields through
413            // the remap so freshly-collapsed duplicate slots are not
414            // reachable from any live `NodeEntry`. This is required
415            // for (c) below to be safe to call: `recycle_unreferenced`
416            // only frees slots with `ref_count == 0`, so the refcount
417            // bookkeeping that `build_dedup_table` consolidated into
418            // the canonical slot must be honoured by node-level fields
419            // as well.
420            rewrite_node_entries_through_remap(&mut self.nodes, &string_remap);
421            // Iter-2 B1 (verbatim): `AuxiliaryIndices::name_index` /
422            // `qualified_name_index` are keyed by `StringId`. If
423            // `build_dedup_table()` collapses a key's slot and
424            // `recycle_unreferenced` subsequently frees it, the bucket
425            // keys would dangle. Remap every `StringId`-backed holder
426            // on the rebuild through the same dedup table before the
427            // recycle pass runs, merging any buckets that collapse onto
428            // the same canonical key.
429            //
430            // All remap-capable stores are listed here exhaustively.
431            // Extending this block when a new `CodeGraph` field gains a
432            // `StringId` payload is a code-owner obligation tied to the
433            // `sqry_graph_fields!` macro (plan §H "Placement and feature
434            // gate"). Today, the StringId-bearing surfaces on the
435            // publish-visible graph are:
436            //
437            //   * `AuxiliaryIndices.name_index` / `qualified_name_index`
438            //     (BTreeMap keys — collapse on canonicalisation)
439            //   * `FileRegistry` — `FileEntry.source_uri: Option<StringId>`
440            //     per slot
441            //   * `AliasTable` — `AliasEntry.from_symbol` /
442            //     `to_symbol` per entry; the `(scope, from_symbol)`
443            //     sort key is re-established after rewrite
444            //   * `ShadowTable` — `ShadowEntry.symbol` per entry; the
445            //     `(scope, symbol, byte_offset)` sort key and the
446            //     `chains` range index are rebuilt after rewrite
447            //
448            // `NodeArena` (above) is the fifth surface; it is handled
449            // inline because it is the single largest consumer and
450            // already has a purpose-built helper.
451            //
452            // Iter-3 B1 (verbatim): `BidirectionalEdgeStore` holds
453            // `EdgeKind` instances whose variants — `Imports{alias}`,
454            // `Exports{alias}`, `TypeOf{name}`, `TraitMethodBinding
455            // {trait_name, impl_type}`, `HttpRequest{url}`,
456            // `GrpcCall{service, method}`, `DbQuery{table}`,
457            // `TableRead{table_name, schema}`, `TableWrite{table_name,
458            // schema}`, `TriggeredBy{trigger_name, schema}`,
459            // `MessageQueue{protocol::Other(_), topic}`,
460            // `WebSocket{event}`, `GraphQLOperation{operation}`,
461            // `ProcessExec{command}`, `FileIpc{path_pattern}`,
462            // `ProtocolCall{protocol, metadata}` — carry
463            // `StringId` payloads inside both the steady-state CSR
464            // (`CsrGraph::edge_kind`) and the mutable delta
465            // (`DeltaBuffer` `DeltaEdge::kind`), in **both** the
466            // forward and reverse stores. Before this iter-4 fix,
467            // step 1 left those payloads un-remapped — so after
468            // `recycle_unreferenced` below, those edges would have
469            // dangled onto freed interner slots. The full-build
470            // pipeline recognises this surface explicitly at
471            // `build/entrypoint.rs` (Phase 4b) +
472            // `build/parallel_commit.rs::phase4_apply_global_remap`;
473            // the rebuild pipeline must do the same against
474            // committed storage because, unlike Phase 4b, the edges
475            // are no longer a `Vec<Vec<PendingEdge>>`.
476            //
477            // `BidirectionalEdgeStore::rewrite_edge_kind_string_ids_through_remap`
478            // is the sixth surface on this list. The exhaustive match
479            // on `EdgeKind` lives in
480            // `parallel_commit::remap_edge_kind_string_ids` so adding a
481            // new `StringId`-bearing variant becomes a single source of
482            // truth: a compile error there forces both pipelines to
483            // rewrite it.
484            //
485            // Stores whose data is keyed by `NodeId`/`FileId`/`EdgeId`
486            // only (metadata keyed on NodeId, node / edge provenance,
487            // scope arena indexed by ScopeId, scope provenance,
488            // file-segments keyed on FileId/EdgeId) do not hold
489            // `StringId` payloads and are out of scope for this
490            // rewrite. Any future field that lifts a `StringId` onto
491            // `CodeGraph` must extend this list in the same commit
492            // that declares it, matching the A2 §K discipline.
493            self.indices.rewrite_string_ids_through_remap(&string_remap);
494            self.files.rewrite_string_ids_through_remap(&string_remap);
495            self.alias_table
496                .rewrite_string_ids_through_remap(&string_remap);
497            self.shadow_table
498                .rewrite_string_ids_through_remap(&string_remap);
499            // Iter-4 K.B1 fix: rewrite `StringId` payloads carried by
500            // committed `EdgeKind`s in both forward and reverse stores,
501            // across both CSR and delta tiers. Must run before
502            // `recycle_unreferenced` below so freed slots stay unreferenced.
503            self.edges
504                .rewrite_edge_kind_string_ids_through_remap(&string_remap);
505        }
506        self.strings.recycle_unreferenced();
507
508        // ----------------------------------------------------------------
509        // Step 2 — Compact NodeArena.
510        //
511        // K.A1 row: `NodeArena::retain_nodes` drops every occupied slot
512        // whose NodeId fails the predicate, advancing the slot's
513        // generation so lingering NodeId handles become stale.
514        // ----------------------------------------------------------------
515        {
516            let tombstones = &self.tombstones;
517            let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
518                Box::new(move |nid| !tombstones.contains(&nid));
519            self.nodes.retain_nodes(&*predicate);
520        }
521
522        // ----------------------------------------------------------------
523        // Step 3 — Compact BidirectionalEdgeStore (forward + reverse).
524        //
525        // K.A2 + K.A3 rows: `retain_nodes` drops every delta edge with
526        // a tombstoned endpoint in either direction. CSR compaction is
527        // deferred to step 9 (CSR is derived state, rebuilt not mutated).
528        // ----------------------------------------------------------------
529        {
530            let tombstones = &self.tombstones;
531            let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
532                Box::new(move |nid| !tombstones.contains(&nid));
533            self.edges.retain_nodes(&*predicate);
534        }
535
536        // ----------------------------------------------------------------
537        // Step 4 — Compact AuxiliaryIndices.
538        //
539        // K.A4–K.A7 rows (kind / name / qualified-name / file indices):
540        // a single `AuxiliaryIndices::retain_nodes` call visits every
541        // inner bucket.
542        // ----------------------------------------------------------------
543        {
544            let tombstones = &self.tombstones;
545            let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
546                Box::new(move |nid| !tombstones.contains(&nid));
547            self.indices.retain_nodes(&*predicate);
548        }
549
550        // ----------------------------------------------------------------
551        // Step 5 — Compact NodeMetadataStore.
552        //
553        // K.A8 row: macro / classpath per-node metadata keyed by the
554        // full `(index, generation)` pair.
555        // ----------------------------------------------------------------
556        {
557            let tombstones = &self.tombstones;
558            let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
559                Box::new(move |nid| !tombstones.contains(&nid));
560            self.macro_metadata.retain_nodes(&*predicate);
561        }
562
563        // ----------------------------------------------------------------
564        // Step 6 — Compact FileRegistry per-file buckets.
565        //
566        // K.B1 row. Iter-2 B2 (verbatim): this step used to be a
567        // no-op scheduled against a future base-plan Step 1. Pulling
568        // `per_file_nodes: HashMap<FileId, Vec<NodeId>>` forward into
569        // Gate 0c retires the no-op — every `NodeId` the rebuild's
570        // parallel-parse pass committed is already bucketed via
571        // `FileRegistry::record_node`, so this step now does real work:
572        //
573        //   * drop every `NodeId` whose arena slot was tombstoned in
574        //     step 2 (or earlier) via the shared predicate
575        //   * dedup each surviving bucket
576        //   * drop buckets that collapse to empty
577        //
578        // The §F.1 bucket-bijection check at step 13 consumes the
579        // result of this compaction, so a mis-applied predicate here
580        // surfaces as a panic at the publish boundary in debug builds.
581        // ----------------------------------------------------------------
582        {
583            let tombstones = &self.tombstones;
584            let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
585                Box::new(move |nid| !tombstones.contains(&nid));
586            self.files.retain_nodes(&*predicate);
587        }
588
589        // ----------------------------------------------------------------
590        // Step 7 — Compact K-list extras.
591        //
592        // Rows K.A10 (node_provenance), K.A11 (scope_arena),
593        // K.A12 (alias_table), K.A13 (shadow_table). Each is a direct
594        // NodeIdBearing::retain_nodes call.
595        //
596        // When a future task lifts a new NodeId-bearing structure onto
597        // `CodeGraph`, extend this step with one additional
598        // `retain_nodes` call and add a K.A/K.B row to
599        // `super::coverage` per the plan §K contract.
600        // ----------------------------------------------------------------
601        {
602            let tombstones = &self.tombstones;
603            let predicate: Box<dyn Fn(NodeId) -> bool + '_> =
604                Box::new(move |nid| !tombstones.contains(&nid));
605            self.node_provenance.retain_nodes(&*predicate);
606            self.scope_arena.retain_nodes(&*predicate);
607            self.alias_table.retain_nodes(&*predicate);
608            self.shadow_table.retain_nodes(&*predicate);
609        }
610
611        // ----------------------------------------------------------------
612        // Step 8 — Drain tombstones into `drained_tombstones`.
613        //
614        // The residue check at step 14 asserts that the finalized
615        // `CodeGraph` does not contain any of these NodeIds in any
616        // NodeId-bearing structure. Keep the drained set until the
617        // check completes; then it is dropped with `self`.
618        // ----------------------------------------------------------------
619        self.drained_tombstones = std::mem::take(&mut self.tombstones);
620
621        // ----------------------------------------------------------------
622        // Step 9 — Rebuild CSR adjacency (both directions).
623        //
624        // CSR is derived state (K.A9 — "CSR adjacency is derived state;
625        // rebuilt from compacted edges — never mutated in place"). After
626        // step 3 filtered the delta tier to live-only endpoints, the
627        // correct operation is to *construct* a fresh CSR from the
628        // compacted delta snapshot and *install* it — not merely drop
629        // the stale cache. This matches plan §H lines 684–691 (the
630        // committed-then-queried graph has a CSR from the first read).
631        //
632        // Build path: `compaction::snapshot_edges` → compaction merges
633        // in-memory delta (no tombstones left after step 3) →
634        // `build_compacted_csr` constructs an immutable `CsrGraph` →
635        // `swap_csrs_and_clear_deltas` installs both directions and
636        // clears the now-absorbed delta buffers. The node_count is the
637        // post-compaction slot count of the arena (not the live count —
638        // CSR uses dense slot indexing; vacant slots appear as nodes
639        // with zero out-edges).
640        //
641        // The plan's example spells this `self.edges.rebuild_csr()` as
642        // shorthand for this sequence; we expand it to concrete calls
643        // so reviewers can audit each primitive. The post-condition is
644        // identical to the plan's: after step 9 the `CodeGraph` about
645        // to be assembled has no CSR referencing a tombstoned NodeId,
646        // because the CSR was built from the already-filtered delta.
647        // ----------------------------------------------------------------
648        {
649            use crate::graph::unified::compaction::{
650                Direction, build_compacted_csr, snapshot_edges,
651            };
652            let node_count = self.nodes.slot_count();
653            // Snapshot the forward/reverse deltas (CSR is still stale
654            // at this point — `build_compacted_csr` uses the delta's
655            // live-only contents). Because step 3 tombstoned endpoints
656            // out of the delta already, the merged build sees only
657            // live edges.
658            let forward_snapshot = {
659                let forward = self.edges.forward();
660                snapshot_edges(&forward, node_count)
661            };
662            let reverse_snapshot = {
663                let reverse = self.edges.reverse();
664                snapshot_edges(&reverse, node_count)
665            };
666            // Build both directions in parallel (matches the pattern
667            // used by `build/entrypoint.rs`).
668            let (forward_csr, reverse_csr) = rayon::join(
669                || build_compacted_csr(&forward_snapshot, Direction::Forward),
670                || build_compacted_csr(&reverse_snapshot, Direction::Reverse),
671            );
672            let (forward_csr, _) =
673                forward_csr.map_err(|e| crate::graph::error::GraphBuilderError::Internal {
674                    reason: format!("rebuild finalize step 9 (forward CSR build): {e}"),
675                })?;
676            let (reverse_csr, _) =
677                reverse_csr.map_err(|e| crate::graph::error::GraphBuilderError::Internal {
678                    reason: format!("rebuild finalize step 9 (reverse CSR build): {e}"),
679                })?;
680            // Install both CSRs and clear the absorbed deltas.
681            self.edges
682                .swap_csrs_and_clear_deltas(forward_csr, reverse_csr);
683        }
684
685        // ----------------------------------------------------------------
686        // Step 10 — Per-language confidence update.
687        //
688        // Incremental rebuild must ensure the published confidence map
689        // matches the set of languages that actually have live nodes in
690        // the rebuild's graph. Languages whose only source files were
691        // all removed during this rebuild must not linger in the
692        // confidence surface — they would mislead MCP clients into
693        // believing analysis for that language is still available.
694        //
695        // The rebuild-local `FileRegistry` carries per-file `Language`
696        // tags; we enumerate them, collect the set of languages with at
697        // least one live file, and drop confidence entries for any
698        // language that no longer appears. Entries for still-present
699        // languages are preserved as-is (their `ConfidenceMetadata` is
700        // updated by the plugin-level confidence-ingestion path in
701        // `CodeGraph::merge_confidence`; this step only filters, it
702        // does not fabricate new samples).
703        //
704        // A language entry is preserved when (a) at least one live file
705        // is tagged with that language in the rebuild-local registry,
706        // OR (b) the confidence entry carries a non-default set of
707        // limitations / unavailable-features — that is analysis
708        // metadata the plugin layer deliberately recorded, and dropping
709        // it on a zero-file rebuild would be lossy. Both conditions are
710        // evaluated against the rebuild-local state that will be
711        // published by step 12.
712        // ----------------------------------------------------------------
713        {
714            use std::collections::BTreeSet;
715            let mut active_languages: BTreeSet<String> = BTreeSet::new();
716            for (_file_id, _path, maybe_language) in self.files.iter_with_language() {
717                if let Some(language) = maybe_language {
718                    active_languages.insert(language.to_string());
719                }
720            }
721            self.confidence.retain(|language_key, meta| {
722                if active_languages.contains(language_key) {
723                    true
724                } else {
725                    // Preserve entries that encode deliberate
726                    // plugin-recorded limitations even when no live
727                    // files remain — those are analysis-state facts
728                    // the daemon must not silently drop.
729                    !meta.limitations.is_empty() || !meta.unavailable_features.is_empty()
730                }
731            });
732        }
733
734        // ----------------------------------------------------------------
735        // Step 11 — Epoch bump.
736        //
737        // `prior_epoch` captured at `clone_for_rebuild` time; +1 marks
738        // the publication boundary. Wrapping add matches the existing
739        // `CodeGraph::bump_epoch` behavior at
740        // `sqry-core/src/graph/unified/concurrent/graph.rs`.
741        // ----------------------------------------------------------------
742        let new_epoch = self.epoch.wrapping_add(1);
743
744        // ----------------------------------------------------------------
745        // Step 12 — Assemble the immutable `CodeGraph`.
746        //
747        // Every Arc-wrapped field is freshly wrapped from the
748        // rebuild-local owned value, so the new `CodeGraph` has no
749        // Arc-sharing relationship with the pre-rebuild snapshot. This
750        // is the point at which the rebuild-local interner becomes
751        // immutable (step 1's freeze).
752        // ----------------------------------------------------------------
753        let mut graph = CodeGraph::__assemble_from_rebuild_parts_internal(
754            self.nodes,
755            self.edges,
756            self.strings,
757            self.files,
758            self.indices,
759            self.macro_metadata,
760            self.node_provenance,
761            self.edge_provenance,
762            self.fact_epoch,
763            new_epoch,
764            self.confidence,
765            self.scope_arena,
766            self.alias_table,
767            self.shadow_table,
768            self.scope_provenance_store,
769            self.file_segments,
770            self.go_hints,
771        );
772        // Phase A (U09): the assembled `CodeGraph` resets
773        // `c_indirect_tables` to `None`. Re-install the rebuild's owned
774        // value so an in-flight rebuild preserves side tables already
775        // committed on the source graph.
776        graph.set_c_indirect_tables(self.c_indirect_tables);
777
778        // ----------------------------------------------------------------
779        // Steps 13 + 14 — (debug) Publish-boundary invariants.
780        //
781        // Per plan §F.3 "single source of truth", both the §F.1 bucket
782        // bijection and the §F.2 tombstone-residue checks fire through
783        // the canonical `publish::assert_publish_invariants` helper.
784        // That helper is also called at the full-rebuild end
785        // (`build_unified_graph_inner`), by Task 6's
786        // `WorkspaceManager::publish_graph`, and by every §E harness
787        // iteration — so any invariant drift surfaces in all four
788        // places simultaneously, not in a subset.
789        //
790        // The §F.2 assertion is the **single site** against
791        // `self.drained_tombstones` (populated at step 8); we do not
792        // run the residue check anywhere else.
793        // ----------------------------------------------------------------
794        #[cfg(any(debug_assertions, test))]
795        crate::graph::unified::publish::assert_publish_invariants(&graph, &self.drained_tombstones);
796
797        Ok(graph)
798    }
799
800    /// Returns the number of NodeIds currently staged for tombstoning
801    /// via [`RebuildGraph::tombstone`] (to be added by Task 4 Step 4).
802    /// Useful for Gate 0c tests that exercise finalize with an empty
803    /// or non-empty tombstone set.
804    #[must_use]
805    pub fn pending_tombstone_count(&self) -> usize {
806        self.tombstones.len()
807    }
808
809    /// Shared-reference accessor for the rebuild-local
810    /// [`NodeArena`]. Read-only; the writer is the rebuild dispatcher
811    /// which holds `&mut self`.
812    ///
813    /// Used by sqry-daemon's `WorkspaceManager` (Task 6) and by the
814    /// Task 4 Step 4 scale test to inspect arena state between
815    /// [`remove_file`](Self::remove_file) calls without going through
816    /// the pub(crate) field directly.
817    #[must_use]
818    pub fn nodes(&self) -> &NodeArena {
819        &self.nodes
820    }
821
822    /// Shared-reference accessor for the rebuild-local
823    /// [`FileRegistry`]. Read-only. The rebuild dispatcher writes via
824    /// `&mut self` on [`remove_file`](Self::remove_file) and
825    /// finalization.
826    #[must_use]
827    pub fn files(&self) -> &FileRegistry {
828        &self.files
829    }
830
831    /// Shared-reference accessor for the rebuild-local
832    /// [`FileSegmentTable`]. Read-only.
833    ///
834    /// `FileSegmentTable` is a plain `Vec<Option<FileSegment>>` with no
835    /// interior mutability (see
836    /// `sqry-core/src/graph/unified/storage/segment.rs`), so this
837    /// accessor is genuinely read-only. Writers ride through the
838    /// `&mut self` mutation path inside
839    /// [`remove_file`](Self::remove_file) and
840    /// [`finalize`](Self::finalize).
841    ///
842    /// Added as part of the iter-1 Codex review fix for Task 4
843    /// Steps 2-3: the `remove_file` integration tests need to assert
844    /// that a file's segment entry is cleared after removal, closing
845    /// the FileId-recycle stale-range bug documented at
846    /// `sqry-core/tests/incremental_remove_file_scale.rs`.
847    #[must_use]
848    pub fn file_segments(&self) -> &FileSegmentTable {
849        &self.file_segments
850    }
851
852    // Note (iter-1 review fix): a `pub fn edges(&self) -> &BidirectionalEdgeStore`
853    // accessor was removed. `BidirectionalEdgeStore` exposes
854    // `forward_mut(&self)` and `reverse_mut(&self)` (interior mutability),
855    // so returning `&BidirectionalEdgeStore` from `&self` would let
856    // external callers escalate a "read-only" handle into a writer. The
857    // rebuild contract requires that edge mutation only happens inside
858    // `RebuildGraph::remove_file` / `finalize` on `&mut self`. If a
859    // read-only edge-view accessor is ever needed by a legitimate
860    // consumer, add a wrapper struct (e.g., `BidirectionalEdgeStoreView`)
861    // that exposes only `forward()` / `reverse()` / `edges_from` /
862    // `edges_to` / `stats` on `&self` — never the `*_mut` methods.
863
864    /// Stage a NodeId for tombstoning during the next `finalize` pass.
865    ///
866    /// Gate 0c ships this helper so finalize tests can drive the
867    /// 14-step contract with a realistic tombstone set without waiting
868    /// for Task 4 Step 4's `remove_file` plumbing. Production callers
869    /// will route through `RebuildGraph::remove_file` (Task 4 Step 4),
870    /// which internally populates the same set via
871    /// `FileRegistry::take_nodes`.
872    pub fn tombstone(&mut self, id: NodeId) {
873        self.tombstones.insert(id);
874    }
875
876    /// Stage every `NodeId` in `ids` for tombstoning during the next
877    /// `finalize` pass. Equivalent to calling [`tombstone`](Self::tombstone)
878    /// for each id, but expresses the bulk intent at the call site.
879    ///
880    /// Used by [`remove_file`](Self::remove_file) to fold every
881    /// file-local NodeId into the staged set in a single pass.
882    pub(crate) fn tombstone_many<I: IntoIterator<Item = NodeId>>(&mut self, ids: I) {
883        self.tombstones.extend(ids);
884    }
885
886    /// Drain every `NodeId` belonging to `file_id`, invalidate every
887    /// rebuild-local edge whose source or target is one of those nodes
888    /// (across both forward and reverse CSR + delta tiers of the
889    /// rebuild-local edge store), drop the file's entry from the
890    /// rebuild-local [`FileRegistry`], and return the list of
891    /// tombstoned [`NodeId`]s.
892    ///
893    /// This is the rebuild-side mirror of
894    /// [`super::super::concurrent::CodeGraph::remove_file`]. Whereas
895    /// the `CodeGraph` variant mutates a live publishable graph in
896    /// place (O(1) publish once the caller wraps it in an `Arc`), this
897    /// variant operates on the owned, pre-finalize
898    /// [`RebuildGraph`] state — so the edge-store tombstones land in
899    /// the CSR's tombstone bitmap and the delta buffer, and are later
900    /// physically purged by [`finalize`](Self::finalize) step 9 when
901    /// the CSR is rebuilt from the compacted delta (§H lines 684–691).
902    ///
903    /// The method does **not** rewrite `NodeIdBearing` surfaces on the
904    /// rebuild (NodeArena, AuxiliaryIndices, NodeMetadataStore,
905    /// NodeProvenanceStore, ScopeArena, AliasTable, ShadowTable) —
906    /// those surfaces are compacted once, uniformly, by `finalize()`
907    /// steps 2–7 against the accumulated `tombstones` set. Running a
908    /// partial compaction here would duplicate work and violate the
909    /// plan's "compact once at step N" contract.
910    ///
911    /// Instead, the tombstoned `NodeId`s are accumulated in
912    /// `self.tombstones` via [`tombstone_many`](Self::tombstone_many).
913    /// Successive `remove_file` calls against different files
914    /// accumulate into the same set; `finalize()` then sweeps every
915    /// K.A/K.B surface once with the union predicate.
916    ///
917    /// # What is tombstoned immediately
918    ///
919    /// Only two surfaces are mutated before `finalize()` runs:
920    ///
921    /// * **NodeArena**: each file-local `NodeId` is `remove`d so the
922    ///   slot's generation advances and stale handles cannot alias a
923    ///   re-allocation. Downstream compaction at step 2 is then a
924    ///   no-op for these slots (idempotent; the arena skips stale
925    ///   generations).
926    /// * **Edge store** (both forward + reverse, both CSR + delta):
927    ///   [`BidirectionalEdgeStore::tombstone_edges_for_nodes`] kills
928    ///   every edge whose source or target is one of the drained
929    ///   NodeIds. CSR tombstones land in `csr_tombstones`; delta
930    ///   edges are dropped outright. Step 9 of `finalize` then
931    ///   rebuilds a fresh CSR from the tombstone-free delta, so the
932    ///   CSR tombstones become physically invisible by publish time.
933    /// * **FileRegistry**: the bucket is drained via
934    ///   [`FileRegistry::take_nodes`] and the file entry is
935    ///   deregistered via [`FileRegistry::unregister`]. Both are
936    ///   idempotent on an already-removed file.
937    ///
938    /// # Returned value
939    ///
940    /// The list of NodeIds that were staged for tombstoning. Empty on
941    /// an unknown or already-removed file. Useful for Gate 0c finalize
942    /// tests that need to assert on bucket-drain correctness or on the
943    /// union of staged tombstones across several `remove_file` calls.
944    ///
945    /// # Idempotency
946    ///
947    /// Calling twice with the same `file_id` is a no-op on the second
948    /// call. The bucket is already drained, `take_nodes` returns an
949    /// empty `Vec`, the rest of the method short-circuits, and the
950    /// `tombstones` set is unchanged.
951    #[allow(dead_code)] // Intra-crate consumer is Task 4 Step 4
952    // (`incremental_rebuild`); external consumer is sqry-daemon's
953    // Task 6 `WorkspaceManager` via the feature-gated `pub use` of
954    // `RebuildGraph`. The visibility is `pub` (not `pub(crate)`) so
955    // the daemon can drive file removals through the rebuild plane,
956    // which is the canonical path for file-deletion events per §F.2.
957    pub fn remove_file(&mut self, file_id: super::super::file::FileId) -> Vec<NodeId> {
958        // Drain the rebuild-local FileRegistry bucket.
959        let tombstoned: Vec<NodeId> = self.files.take_nodes(file_id);
960        // Deregister the file entry unconditionally (idempotent).
961        self.files.unregister(file_id);
962        // Clear the rebuild-local `FileSegmentTable` entry for this
963        // file (idempotent — `remove` no-ops on unknown ids). This
964        // matches `CodeGraph::remove_file`'s behaviour: finalize()
965        // publishes `self.file_segments` verbatim at step 12, so any
966        // stale entry leaked here would survive into the assembled
967        // `CodeGraph`. Because `FileRegistry::unregister` recycles
968        // `FileId` slots, a leaked segment would later alias a
969        // different file's node range after the slot is reissued —
970        // which would cause `reindex_files` to tombstone the wrong
971        // range. Clearing the segment at remove time is the only
972        // defence that closes both the "finalize publishes stale
973        // segment" path AND the "FileId recycle attaches stale
974        // range" path.
975        self.file_segments.remove(file_id);
976
977        if tombstoned.is_empty() {
978            return tombstoned;
979        }
980
981        let dead: std::collections::HashSet<NodeId> = tombstoned.iter().copied().collect();
982
983        // 1. Tombstone each arena slot. `NodeArena::remove` is
984        //    idempotent against stale generations, so retriggering a
985        //    file removal after finalize is safe.
986        for &nid in &tombstoned {
987            let _ = self.nodes.remove(nid);
988        }
989
990        // 2. Invalidate edges across both CSR + delta in both
991        //    directions on the rebuild-local edge store.
992        self.edges.tombstone_edges_for_nodes(&dead);
993
994        // 3. Stage the drained NodeIds for the finalize-time K.A/K.B
995        //    compaction sweep. finalize() will consume these at steps
996        //    2–7 through the NodeIdBearing::retain_nodes predicate.
997        self.tombstone_many(tombstoned.iter().copied());
998
999        tombstoned
1000    }
1001
1002    /// Returns the rebuild-local epoch captured at `clone_for_rebuild`.
1003    #[must_use]
1004    pub fn prior_epoch(&self) -> u64 {
1005        self.epoch
1006    }
1007
1008    /// Pre-finalize tombstone-residue check on the rebuild-state
1009    /// structures themselves (plan §F.2 literal named API).
1010    ///
1011    /// Iterates every `NodeIdBearing` field on this `RebuildGraph` and
1012    /// panics if any contains a NodeId present in `self.tombstones`.
1013    /// This is a diagnostic helper for mid-rebuild consistency checks:
1014    /// e.g., Task 4 Step 4's `RebuildGraph::remove_file` can call this
1015    /// after tombstoning a file but before more closure files land, to
1016    /// prove the just-tombstoned NodeIds really left every index before
1017    /// the next pass commits. The `RebuildGraph::finalize` flow then
1018    /// re-asserts against the drained set on the *assembled* `CodeGraph`
1019    /// at step 14 — those are two different snapshots of the same
1020    /// invariant; both must hold.
1021    ///
1022    /// No-op when `self.tombstones` is empty or in release builds.
1023    ///
1024    /// Does **not** replace the step-14 call site; the single source of
1025    /// truth for the publish-boundary residue check remains
1026    /// `crate::graph::unified::publish::assert_publish_invariants`. This
1027    /// helper exists so pre-finalize call sites (Task 4 Step 4's
1028    /// post-remove sanity check, incremental-engine debug probes, Gate
1029    /// 0d negative tests) can name the assertion without rolling their
1030    /// own iteration.
1031    #[cfg(any(debug_assertions, test))]
1032    pub fn assert_no_tombstone_residue(&self) {
1033        use super::coverage::NodeIdBearing;
1034        let dead = &self.tombstones;
1035        if dead.is_empty() {
1036            return;
1037        }
1038        for nid in self.nodes.all_node_ids() {
1039            assert!(
1040                !dead.contains(&nid),
1041                "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in NodeArena"
1042            );
1043        }
1044        for nid in self.edges.all_node_ids() {
1045            assert!(
1046                !dead.contains(&nid),
1047                "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in edge store"
1048            );
1049        }
1050        for nid in self.indices.all_node_ids() {
1051            assert!(
1052                !dead.contains(&nid),
1053                "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in auxiliary indices"
1054            );
1055        }
1056        for nid in self.macro_metadata.all_node_ids() {
1057            assert!(
1058                !dead.contains(&nid),
1059                "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in macro metadata"
1060            );
1061        }
1062        for nid in self.node_provenance.all_node_ids() {
1063            assert!(
1064                !dead.contains(&nid),
1065                "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in node provenance"
1066            );
1067        }
1068        for nid in self.scope_arena.all_node_ids() {
1069            assert!(
1070                !dead.contains(&nid),
1071                "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in scope arena"
1072            );
1073        }
1074        for nid in self.alias_table.all_node_ids() {
1075            assert!(
1076                !dead.contains(&nid),
1077                "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in alias table"
1078            );
1079        }
1080        for nid in self.shadow_table.all_node_ids() {
1081            assert!(
1082                !dead.contains(&nid),
1083                "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in shadow table"
1084            );
1085        }
1086        for nid in self.files.all_node_ids() {
1087            assert!(
1088                !dead.contains(&nid),
1089                "RebuildGraph::assert_no_tombstone_residue: tombstone {nid:?} still in per-file bucket"
1090            );
1091        }
1092    }
1093}
1094
1095// =====================================================================
1096// Step 1 helper: rewrite every live `NodeEntry`'s interned-string
1097// fields through the dedup-remap table produced by
1098// `StringInterner::build_dedup_table()`.
1099// =====================================================================
1100
1101/// Rewrite every `NodeEntry` field that holds a [`crate::graph::unified::string::id::StringId`]
1102/// through `remap` so no live node points at a collapsed-duplicate
1103/// slot. Called from finalize step 1 after the interner has been
1104/// canonicalised; a no-op when `remap` is empty (the common case of a
1105/// no-op rebuild).
1106///
1107/// Fields rewritten: `name`, `signature`, `doc`, `qualified_name`,
1108/// `visibility`. These are every `StringId` / `Option<StringId>` field
1109/// that `NodeEntry` currently declares. Adding a new `StringId`-valued
1110/// field to `NodeEntry` requires extending this helper — the closest
1111/// enforcement is the compile-fail coverage in the `sqry_graph_fields!`
1112/// macro which forces a new `RebuildGraph` field and a matching
1113/// finalize step for any new `CodeGraph` field; individual arena
1114/// fields are caught at review time against this helper.
1115fn rewrite_node_entries_through_remap(
1116    nodes: &mut NodeArena,
1117    remap: &HashMap<
1118        crate::graph::unified::string::id::StringId,
1119        crate::graph::unified::string::id::StringId,
1120    >,
1121) {
1122    if remap.is_empty() {
1123        return;
1124    }
1125    for (_id, entry) in nodes.iter_mut() {
1126        if let Some(&canon) = remap.get(&entry.name) {
1127            entry.name = canon;
1128        }
1129        if let Some(sid) = entry.signature
1130            && let Some(&canon) = remap.get(&sid)
1131        {
1132            entry.signature = Some(canon);
1133        }
1134        if let Some(sid) = entry.doc
1135            && let Some(&canon) = remap.get(&sid)
1136        {
1137            entry.doc = Some(canon);
1138        }
1139        if let Some(sid) = entry.qualified_name
1140            && let Some(&canon) = remap.get(&sid)
1141        {
1142            entry.qualified_name = Some(canon);
1143        }
1144        if let Some(sid) = entry.visibility
1145            && let Some(&canon) = remap.get(&sid)
1146        {
1147            entry.visibility = Some(canon);
1148        }
1149    }
1150}
1151
1152// =====================================================================
1153// Assembly path notes (A2 §H "Type-enforced publish path")
1154// =====================================================================
1155//
1156// The only route from `RebuildGraph` to `Arc<CodeGraph>` is:
1157//
1158//     let code_graph = rebuild.finalize()?;            // Step 12 assembles CodeGraph
1159//     let arc = Arc::new(code_graph);                  // caller wraps (publish site)
1160//
1161// The assembly inside `finalize` uses
1162// `CodeGraph::__assemble_from_rebuild_parts_internal`, which is
1163// `pub(crate)` on `CodeGraph`. That means downstream crates — even
1164// `sqry-daemon` with `rebuild-internals` enabled — cannot call the
1165// assembler directly; they must route through `finalize()`, which
1166// guarantees the 14-step compaction sequence runs first.
1167//
1168// The trybuild fixture
1169// `sqry-core/tests/rebuild_internals_compile_fail/rebuild_graph_no_public_assembly.rs`
1170// exercises this: a downstream crate attempting to call
1171// `__assemble_from_rebuild_parts_internal` fails with E0603 ("function
1172// is private"), and attempting to `impl From<RebuildGraph> for CodeGraph`
1173// from outside this module fails with E0117 (orphan-rule violation).
1174
1175// =====================================================================
1176// Tests
1177// =====================================================================
1178
1179#[cfg(test)]
1180mod tests {
1181    use super::*;
1182    use crate::graph::unified::file::FileId;
1183    use crate::graph::unified::node::NodeKind;
1184    use crate::graph::unified::storage::arena::NodeEntry;
1185    use crate::graph::unified::storage::metadata::MacroNodeMetadata;
1186
1187    /// Seed a `CodeGraph` with a handful of live nodes distributed over
1188    /// two files so the per-field compaction steps have something
1189    /// non-trivial to operate on.
1190    fn seeded_graph() -> (CodeGraph, NodeId, NodeId, NodeId) {
1191        let mut graph = CodeGraph::new();
1192        let file_a = FileId::new(1);
1193        let file_b = FileId::new(2);
1194        let sym = graph.strings_mut().intern("symbol_a").expect("intern a");
1195        let node_a;
1196        let node_b;
1197        let node_c;
1198        {
1199            let arena = graph.nodes_mut();
1200            node_a = arena
1201                .alloc(NodeEntry::new(NodeKind::Function, sym, file_a))
1202                .expect("alloc a");
1203            node_b = arena
1204                .alloc(NodeEntry::new(NodeKind::Method, sym, file_a))
1205                .expect("alloc b");
1206            node_c = arena
1207                .alloc(NodeEntry::new(NodeKind::Struct, sym, file_b))
1208                .expect("alloc c");
1209        }
1210        graph
1211            .indices_mut()
1212            .add(node_a, NodeKind::Function, sym, Some(sym), file_a);
1213        graph
1214            .indices_mut()
1215            .add(node_b, NodeKind::Method, sym, Some(sym), file_a);
1216        graph
1217            .indices_mut()
1218            .add(node_c, NodeKind::Struct, sym, Some(sym), file_b);
1219        graph
1220            .macro_metadata_mut()
1221            .insert(node_a, MacroNodeMetadata::default());
1222        (graph, node_a, node_b, node_c)
1223    }
1224
1225    #[test]
1226    fn clone_for_rebuild_copies_every_field_without_arc_sharing() {
1227        let (graph, _a, _b, _c) = seeded_graph();
1228        let rebuild = graph.clone_for_rebuild();
1229
1230        // Field-level counts match the source graph.
1231        assert_eq!(rebuild.nodes.len(), graph.nodes().len());
1232        assert_eq!(rebuild.macro_metadata.len(), graph.macro_metadata().len());
1233        // The rebuild owns its own arena (not an Arc share), so
1234        // mutating it in place would not affect the source graph. We
1235        // verify this by tombstoning a node in the rebuild and
1236        // confirming the source is unchanged.
1237        let mut rebuild = rebuild;
1238        let ids: Vec<NodeId> = rebuild.nodes.all_node_ids().collect();
1239        let victim = *ids.first().expect("at least one node");
1240        rebuild.tombstone(victim);
1241        assert_eq!(rebuild.pending_tombstone_count(), 1);
1242        // Source graph unaffected.
1243        assert_eq!(graph.nodes().len(), ids.len());
1244    }
1245
1246    #[test]
1247    fn clone_for_rebuild_preserves_epoch_and_fact_epoch() {
1248        let (mut graph, _, _, _) = seeded_graph();
1249        graph.set_epoch(7);
1250        let rebuild = graph.clone_for_rebuild();
1251        assert_eq!(rebuild.prior_epoch(), 7);
1252        assert_eq!(rebuild.fact_epoch, graph.fact_epoch());
1253    }
1254
1255    // ---- Step-level finalize tests ---------------------------------
1256
1257    #[test]
1258    fn finalize_step11_bumps_epoch_by_one() {
1259        let (mut graph, _, _, _) = seeded_graph();
1260        graph.set_epoch(41);
1261        let rebuild = graph.clone_for_rebuild();
1262        let finalized = rebuild.finalize().expect("finalize ok");
1263        assert_eq!(finalized.epoch(), 42);
1264    }
1265
1266    #[test]
1267    fn finalize_step8_drains_tombstones_into_drained_set() {
1268        // We use a reach-into-guts test: finalize consumes self, so we
1269        // verify the drain via a direct RebuildGraph constructed inside
1270        // this module (which has crate visibility into the field).
1271        let (graph, a, _b, _c) = seeded_graph();
1272        let mut rebuild = graph.clone_for_rebuild();
1273        rebuild.tombstone(a);
1274        assert_eq!(rebuild.pending_tombstone_count(), 1);
1275        // After finalize, the graph must not contain `a`.
1276        let finalized = rebuild.finalize().expect("finalize ok");
1277        assert!(
1278            finalized.nodes().get(a).is_none(),
1279            "tombstoned node must be gone from arena"
1280        );
1281    }
1282
1283    #[test]
1284    fn finalize_steps_2_and_4_compact_arena_and_indices_consistently() {
1285        let (graph, a, _b, c) = seeded_graph();
1286        let mut rebuild = graph.clone_for_rebuild();
1287        rebuild.tombstone(a);
1288        rebuild.tombstone(c);
1289        let finalized = rebuild.finalize().expect("finalize ok");
1290
1291        // Arena compaction (step 2): dropped nodes are gone.
1292        assert!(finalized.nodes().get(a).is_none());
1293        assert!(finalized.nodes().get(c).is_none());
1294        // Step 4 must compact the auxiliary indices in lockstep, so
1295        // the tombstoned ids do not linger in any index.
1296        use crate::graph::unified::storage::metadata::TypedMetadata;
1297        let _ = TypedMetadata::Macro(MacroNodeMetadata::default()); // silence unused import warning on some cfgs
1298        assert!(!finalized.indices().by_kind(NodeKind::Function).contains(&a));
1299        assert!(!finalized.indices().by_kind(NodeKind::Struct).contains(&c));
1300    }
1301
1302    #[test]
1303    fn finalize_step5_compacts_macro_metadata() {
1304        let (graph, a, _b, _c) = seeded_graph();
1305        let mut rebuild = graph.clone_for_rebuild();
1306        rebuild.tombstone(a);
1307        let finalized = rebuild.finalize().expect("finalize ok");
1308        // `a` had macro metadata seeded; after finalize it must be gone.
1309        assert!(finalized.macro_metadata().get_macro(a).is_none());
1310    }
1311
1312    #[test]
1313    fn finalize_step9_installs_rebuilt_csr() {
1314        // Step 9 (plan §H lines 684–691) must install a freshly built
1315        // CSR — not merely drop the stale cache. The assembled graph's
1316        // forward and reverse edge stores must both expose a `csr()`
1317        // after finalize, and the deltas must be empty (absorbed by
1318        // the swap).
1319        let (graph, _, _, _) = seeded_graph();
1320        let rebuild = graph.clone_for_rebuild();
1321        let finalized = rebuild.finalize().expect("finalize ok");
1322        assert!(
1323            finalized.edges().forward().csr().is_some(),
1324            "step 9 must install forward CSR"
1325        );
1326        assert!(
1327            finalized.edges().reverse().csr().is_some(),
1328            "step 9 must install reverse CSR"
1329        );
1330        // Deltas were absorbed by the swap.
1331        assert_eq!(finalized.edges().forward().delta_count(), 0);
1332        assert_eq!(finalized.edges().reverse().delta_count(), 0);
1333    }
1334
1335    #[test]
1336    fn finalize_step10_drops_confidence_for_removed_languages() {
1337        // Seed a graph with Rust + Python confidence; the rebuild-local
1338        // FileRegistry has zero files tagged (seeded_graph does not
1339        // register paths), so step 10 must drop both.
1340        let (mut graph, _, _, _) = seeded_graph();
1341        graph.merge_confidence(
1342            "rust",
1343            crate::confidence::ConfidenceMetadata {
1344                level: crate::confidence::ConfidenceLevel::Verified,
1345                ..Default::default()
1346            },
1347        );
1348        graph.merge_confidence(
1349            "python",
1350            crate::confidence::ConfidenceMetadata {
1351                level: crate::confidence::ConfidenceLevel::Partial,
1352                ..Default::default()
1353            },
1354        );
1355        assert_eq!(graph.confidence().len(), 2);
1356
1357        let rebuild = graph.clone_for_rebuild();
1358        let finalized = rebuild.finalize().expect("finalize ok");
1359        assert!(
1360            finalized.confidence().is_empty(),
1361            "step 10 must drop confidence entries for languages that \
1362             have no live files and no recorded limitations"
1363        );
1364    }
1365
1366    #[test]
1367    fn finalize_step10_preserves_confidence_with_limitations() {
1368        // A confidence entry that encodes deliberate limitations must
1369        // survive a zero-file rebuild: dropping it would silently lose
1370        // plugin-recorded analysis state.
1371        let (mut graph, _, _, _) = seeded_graph();
1372        graph.merge_confidence(
1373            "rust",
1374            crate::confidence::ConfidenceMetadata {
1375                level: crate::confidence::ConfidenceLevel::AstOnly,
1376                limitations: vec!["no rust-analyzer".to_string()],
1377                unavailable_features: vec!["type inference".to_string()],
1378            },
1379        );
1380        let rebuild = graph.clone_for_rebuild();
1381        let finalized = rebuild.finalize().expect("finalize ok");
1382        assert_eq!(finalized.confidence().len(), 1);
1383        assert!(finalized.confidence().contains_key("rust"));
1384    }
1385
1386    #[test]
1387    fn finalize_step1_canonicalises_interner_via_dedup() {
1388        // Freeze step must leave the interner's lookup consistent
1389        // (non-stale) and zero duplicate slots after finalize.
1390        let (graph, _, _, _) = seeded_graph();
1391        let rebuild = graph.clone_for_rebuild();
1392        let finalized = rebuild.finalize().expect("finalize ok");
1393        assert!(
1394            !finalized.strings().is_lookup_stale(),
1395            "step 1 freeze must leave lookup_stale == false"
1396        );
1397    }
1398
1399    #[test]
1400    fn finalize_is_infallible_on_empty_tombstone_set() {
1401        let (graph, _, _, _) = seeded_graph();
1402        let rebuild = graph.clone_for_rebuild();
1403        let result = rebuild.finalize();
1404        assert!(result.is_ok(), "empty-tombstone finalize must succeed");
1405        let finalized = result.unwrap();
1406        // Every node survives.
1407        assert_eq!(finalized.nodes().len(), 3);
1408    }
1409
1410    #[test]
1411    fn finalize_survives_interner_snapshot_unchanged_when_no_edits() {
1412        let (graph, _, _, _) = seeded_graph();
1413        let prior_string_count = graph.strings().len();
1414        let rebuild = graph.clone_for_rebuild();
1415        let finalized = rebuild.finalize().expect("finalize ok");
1416        assert_eq!(
1417            finalized.strings().len(),
1418            prior_string_count,
1419            "freeze step must preserve string count across a no-op rebuild"
1420        );
1421    }
1422
1423    #[test]
1424    fn rebuild_graph_pending_tombstone_count_is_accurate() {
1425        let (graph, a, b, c) = seeded_graph();
1426        let mut rebuild = graph.clone_for_rebuild();
1427        assert_eq!(rebuild.pending_tombstone_count(), 0);
1428        rebuild.tombstone(a);
1429        rebuild.tombstone(b);
1430        rebuild.tombstone(c);
1431        // Inserting the same id again is idempotent.
1432        rebuild.tombstone(a);
1433        assert_eq!(rebuild.pending_tombstone_count(), 3);
1434    }
1435
1436    // ----------------------------------------------------------------
1437    // Iter-2 B1: StringId remap across every StringId-backed holder.
1438    // ----------------------------------------------------------------
1439
1440    /// Construct a RebuildGraph whose interner has intentional duplicate
1441    /// `StringId` slots (two slots containing the same canonical
1442    /// "symbol_dup" bytes), with the **live** state referencing both
1443    /// slots. `build_dedup_table()` must collapse them, step 1 must
1444    /// rewrite every StringId-backed surface through the remap, and
1445    /// step 4 must leave no bucket pointing at a recycled StringId.
1446    #[test]
1447    fn step1_remaps_auxiliary_indices_keys_through_dedup_table() {
1448        use crate::graph::unified::storage::interner::StringInterner;
1449
1450        let mut graph = CodeGraph::new();
1451        // Intern "alpha" twice via the bulk path so the two slots are
1452        // NOT coalesced at intern time. We emulate that by manually
1453        // using `alloc_range` + direct bulk_slices_mut assignment on
1454        // the interner.
1455        let interner: &mut StringInterner = graph.strings_mut();
1456        let start = interner.alloc_range(2).expect("alloc range");
1457        {
1458            let (s_slots, rc_slots) = interner.bulk_slices_mut(start, 2);
1459            s_slots[0] = Some(std::sync::Arc::from("alpha"));
1460            s_slots[1] = Some(std::sync::Arc::from("alpha"));
1461            rc_slots[0] = 1;
1462            rc_slots[1] = 1;
1463        }
1464        let id_dup = crate::graph::unified::string::id::StringId::new(start + 1);
1465        let id_canon = crate::graph::unified::string::id::StringId::new(start);
1466        let file = FileId::new(1);
1467
1468        // Allocate one node that references the duplicate StringId so
1469        // after dedup the arena still points at a canonical id.
1470        let mut entry = NodeEntry::new(NodeKind::Function, id_dup, file);
1471        entry.qualified_name = Some(id_dup);
1472        let node = graph.nodes_mut().alloc(entry).expect("alloc");
1473        graph
1474            .indices_mut()
1475            .add(node, NodeKind::Function, id_dup, Some(id_dup), file);
1476
1477        let rebuild = graph.clone_for_rebuild();
1478        let finalized = rebuild.finalize().expect("finalize ok");
1479
1480        // After step 1 the duplicate slot must be recycled and every
1481        // live surface must reference the canonical slot.
1482        // (a) Arena field references canonical
1483        let entry = finalized.nodes().get(node).expect("node survives");
1484        assert_eq!(entry.name, id_canon, "NodeEntry.name must be canonicalised");
1485        assert_eq!(
1486            entry.qualified_name,
1487            Some(id_canon),
1488            "NodeEntry.qualified_name must be canonicalised"
1489        );
1490        // (b) AuxiliaryIndices: by_name must find the node under canonical id.
1491        assert!(
1492            finalized.indices().by_name(id_canon).contains(&node),
1493            "indices.by_name under canonical StringId must contain the node"
1494        );
1495        // (c) And must NOT have any bucket under the duplicate StringId.
1496        assert!(
1497            finalized.indices().by_name(id_dup).is_empty(),
1498            "duplicate StringId bucket must be empty after remap"
1499        );
1500        assert!(
1501            finalized.indices().by_qualified_name(id_dup).is_empty(),
1502            "duplicate StringId qname bucket must be empty after remap"
1503        );
1504        // (d) Interner has no stale lookup.
1505        assert!(!finalized.strings().is_lookup_stale());
1506    }
1507
1508    #[test]
1509    fn step1_merges_aux_indices_buckets_when_keys_collapse() {
1510        use crate::graph::unified::storage::interner::StringInterner;
1511
1512        let mut graph = CodeGraph::new();
1513        let interner: &mut StringInterner = graph.strings_mut();
1514        let start = interner.alloc_range(2).expect("alloc range");
1515        {
1516            let (s_slots, rc_slots) = interner.bulk_slices_mut(start, 2);
1517            s_slots[0] = Some(std::sync::Arc::from("beta"));
1518            s_slots[1] = Some(std::sync::Arc::from("beta"));
1519            rc_slots[0] = 1;
1520            rc_slots[1] = 1;
1521        }
1522        let id_canon = crate::graph::unified::string::id::StringId::new(start);
1523        let id_dup = crate::graph::unified::string::id::StringId::new(start + 1);
1524        let file = FileId::new(1);
1525
1526        // Two nodes: one references canonical, one references duplicate.
1527        // After remap, both must land in the canonical bucket of
1528        // `name_index` (merged, deduplicated by NodeId).
1529        let node_canon = graph
1530            .nodes_mut()
1531            .alloc(NodeEntry::new(NodeKind::Function, id_canon, file))
1532            .expect("alloc canon");
1533        let node_dup = graph
1534            .nodes_mut()
1535            .alloc(NodeEntry::new(NodeKind::Function, id_dup, file))
1536            .expect("alloc dup");
1537        graph
1538            .indices_mut()
1539            .add(node_canon, NodeKind::Function, id_canon, None, file);
1540        graph
1541            .indices_mut()
1542            .add(node_dup, NodeKind::Function, id_dup, None, file);
1543
1544        let rebuild = graph.clone_for_rebuild();
1545        let finalized = rebuild.finalize().expect("finalize ok");
1546
1547        let canonical_bucket = finalized.indices().by_name(id_canon);
1548        assert!(canonical_bucket.contains(&node_canon));
1549        assert!(canonical_bucket.contains(&node_dup));
1550        // No duplicates within the merged bucket.
1551        let mut seen = std::collections::HashSet::new();
1552        for id in canonical_bucket {
1553            assert!(seen.insert(*id), "bucket must be dedup'd by NodeId");
1554        }
1555        assert!(
1556            finalized.indices().by_name(id_dup).is_empty(),
1557            "collapsed duplicate key bucket must be removed"
1558        );
1559    }
1560
1561    #[test]
1562    fn step1_remaps_file_registry_source_uri() {
1563        use crate::graph::unified::storage::interner::StringInterner;
1564
1565        let mut graph = CodeGraph::new();
1566        // Force two identical "file:///foo" StringId slots.
1567        let interner: &mut StringInterner = graph.strings_mut();
1568        let start = interner.alloc_range(2).expect("alloc range");
1569        {
1570            let (s, rc) = interner.bulk_slices_mut(start, 2);
1571            s[0] = Some(std::sync::Arc::from("file:///foo"));
1572            s[1] = Some(std::sync::Arc::from("file:///foo"));
1573            rc[0] = 1;
1574            rc[1] = 1;
1575        }
1576        let id_canon = crate::graph::unified::string::id::StringId::new(start);
1577        let id_dup = crate::graph::unified::string::id::StringId::new(start + 1);
1578
1579        let fid = graph
1580            .files_mut()
1581            .register_external_with_uri(
1582                "/virtual/Foo.class",
1583                Some(crate::graph::node::Language::Java),
1584                Some(id_dup),
1585            )
1586            .expect("register external");
1587
1588        let rebuild = graph.clone_for_rebuild();
1589        let finalized = rebuild.finalize().expect("finalize ok");
1590
1591        let view = finalized
1592            .files()
1593            .file_provenance(fid)
1594            .expect("provenance present");
1595        assert_eq!(
1596            view.source_uri,
1597            Some(id_canon),
1598            "FileRegistry source_uri must be rewritten to canonical StringId"
1599        );
1600    }
1601
1602    // ----------------------------------------------------------------
1603    // Iter-4 B1: edge-store StringId remap across delta + CSR tiers.
1604    // ----------------------------------------------------------------
1605
1606    /// Construct a `RebuildGraph` whose interner has three slots
1607    /// containing the same canonical bytes ("symbol_edge_dup"), with
1608    /// two distinct `EdgeKind::Imports { alias }` edges pointing at
1609    /// different duplicate slots. Step 1 must rewrite both aliases
1610    /// through the dedup table so the post-step-9 CSR references only
1611    /// the canonical slot; the duplicate slots must be recycled by
1612    /// `recycle_unreferenced` (ref_count == 0) and the canonical slot
1613    /// must retain references.
1614    ///
1615    /// This test intentionally exercises both the delta-tier write path
1616    /// (`DeltaBuffer::iter_mut`) *during* step 1 (where the edges still
1617    /// live in the delta buffer because no compaction has been run)
1618    /// and the CSR-tier read path (where the edges land after step 9's
1619    /// `swap_csrs_and_clear_deltas`). The companion test
1620    /// `step1_remaps_edge_kind_string_ids_in_csr_tier` covers the
1621    /// symmetric case where edges start in the CSR tier at the time
1622    /// step 1 runs.
1623    #[test]
1624    fn step1_remaps_edge_kind_string_ids_through_dedup_table() {
1625        use crate::graph::unified::edge::EdgeKind;
1626        use crate::graph::unified::storage::interner::StringInterner;
1627
1628        let mut graph = CodeGraph::new();
1629        let file_a = FileId::new(1);
1630        // Force three slots carrying the same canonical bytes so
1631        // `build_dedup_table()` collapses slot[start+1] and slot[start+2]
1632        // onto slot[start].
1633        let interner: &mut StringInterner = graph.strings_mut();
1634        let start = interner.alloc_range(3).expect("alloc range");
1635        {
1636            let (s, rc) = interner.bulk_slices_mut(start, 3);
1637            s[0] = Some(std::sync::Arc::from("symbol_edge_dup"));
1638            s[1] = Some(std::sync::Arc::from("symbol_edge_dup"));
1639            s[2] = Some(std::sync::Arc::from("symbol_edge_dup"));
1640            rc[0] = 1;
1641            rc[1] = 1;
1642            rc[2] = 1;
1643        }
1644        let id_canon = crate::graph::unified::string::id::StringId::new(start);
1645        let id_dup_1 = crate::graph::unified::string::id::StringId::new(start + 1);
1646        let id_dup_2 = crate::graph::unified::string::id::StringId::new(start + 2);
1647
1648        // Allocate two nodes so we can hang two edges off distinct
1649        // (src, tgt) pairs and verify both are remapped.
1650        let src;
1651        let tgt;
1652        {
1653            let arena = graph.nodes_mut();
1654            src = arena
1655                .alloc(NodeEntry::new(NodeKind::Function, id_canon, file_a))
1656                .expect("alloc src");
1657            tgt = arena
1658                .alloc(NodeEntry::new(NodeKind::Function, id_canon, file_a))
1659                .expect("alloc tgt");
1660        }
1661        graph
1662            .indices_mut()
1663            .add(src, NodeKind::Function, id_canon, None, file_a);
1664        graph
1665            .indices_mut()
1666            .add(tgt, NodeKind::Function, id_canon, None, file_a);
1667
1668        // Edge 1: Imports { alias = id_dup_1 } — forward src→tgt.
1669        graph.edges_mut().add_edge(
1670            src,
1671            tgt,
1672            EdgeKind::Imports {
1673                alias: Some(id_dup_1),
1674                is_wildcard: false,
1675            },
1676            file_a,
1677        );
1678        // Edge 2: Imports { alias = id_dup_2 } — forward tgt→src (so the
1679        // two edges are distinct keys in the store).
1680        graph.edges_mut().add_edge(
1681            tgt,
1682            src,
1683            EdgeKind::Imports {
1684                alias: Some(id_dup_2),
1685                is_wildcard: true,
1686            },
1687            file_a,
1688        );
1689
1690        // Pre-finalize sanity: the edges live in the delta tier.
1691        assert!(
1692            graph.edges().forward().delta_count() >= 2,
1693            "pre-finalize: forward delta must hold both Imports edges"
1694        );
1695        assert!(
1696            graph.edges().reverse().delta_count() >= 2,
1697            "pre-finalize: reverse delta must hold the mirror of both Imports edges"
1698        );
1699
1700        let rebuild = graph.clone_for_rebuild();
1701        let finalized = rebuild.finalize().expect("finalize ok");
1702
1703        // After finalize step 9, edges have been absorbed into the CSR
1704        // tier by `swap_csrs_and_clear_deltas`. Step 1's `EdgeKind`
1705        // rewrite ran on the delta *before* step 9 snapshotted it, so
1706        // the CSR we assert on is built from the already-remapped delta.
1707        //
1708        // Forward CSR: every `Imports` alias must be canonical.
1709        let forward = finalized.edges().forward();
1710        let fwd_csr = forward
1711            .csr()
1712            .expect("forward CSR must be populated after step 9");
1713        let mut fwd_imports_seen = 0usize;
1714        for kind in fwd_csr.edge_kind() {
1715            if let EdgeKind::Imports { alias, .. } = kind {
1716                fwd_imports_seen += 1;
1717                assert_ne!(
1718                    *alias,
1719                    Some(id_dup_1),
1720                    "forward CSR Imports alias must not reference pre-dedup slot id_dup_1"
1721                );
1722                assert_ne!(
1723                    *alias,
1724                    Some(id_dup_2),
1725                    "forward CSR Imports alias must not reference pre-dedup slot id_dup_2"
1726                );
1727                assert_eq!(
1728                    *alias,
1729                    Some(id_canon),
1730                    "forward CSR Imports alias must be canonicalised"
1731                );
1732            }
1733        }
1734        drop(forward);
1735        assert!(
1736            fwd_imports_seen >= 2,
1737            "forward CSR must hold both Imports edges after finalize (saw {fwd_imports_seen})"
1738        );
1739
1740        // Reverse CSR: mirror edges must be canonicalised identically.
1741        let reverse = finalized.edges().reverse();
1742        let rev_csr = reverse
1743            .csr()
1744            .expect("reverse CSR must be populated after step 9");
1745        let mut rev_imports_seen = 0usize;
1746        for kind in rev_csr.edge_kind() {
1747            if let EdgeKind::Imports { alias, .. } = kind {
1748                rev_imports_seen += 1;
1749                assert_ne!(*alias, Some(id_dup_1));
1750                assert_ne!(*alias, Some(id_dup_2));
1751                assert_eq!(*alias, Some(id_canon));
1752            }
1753        }
1754        drop(reverse);
1755        assert!(
1756            rev_imports_seen >= 2,
1757            "reverse CSR must mirror the forward Imports edges (saw {rev_imports_seen})"
1758        );
1759
1760        // Step-1 (c) invariant: after `recycle_unreferenced`, the
1761        // duplicate slots must report `ref_count == 0`. If step 1's
1762        // edge-store remap had skipped `EdgeKind` payloads, the
1763        // duplicates' refcounts would stay positive (held by the edge
1764        // store) and recycle would not reclaim them.
1765        assert_eq!(
1766            finalized.strings().ref_count(id_dup_1),
1767            0,
1768            "duplicate slot id_dup_1 must be recycled (ref_count == 0) by step 1"
1769        );
1770        assert_eq!(
1771            finalized.strings().ref_count(id_dup_2),
1772            0,
1773            "duplicate slot id_dup_2 must be recycled (ref_count == 0) by step 1"
1774        );
1775        assert!(
1776            finalized.strings().ref_count(id_canon) > 0,
1777            "canonical slot must retain references from the two Imports edges (got {})",
1778            finalized.strings().ref_count(id_canon)
1779        );
1780    }
1781
1782    /// Same construction as above, but compact the edges into a CSR tier
1783    /// before calling finalize so the rewrite covers the steady-state
1784    /// `CsrGraph::edge_kind` path. This exercises the branch of
1785    /// `BidirectionalEdgeStore::rewrite_edge_kind_string_ids_through_remap`
1786    /// that is otherwise dead in the tests above.
1787    #[test]
1788    fn step1_remaps_edge_kind_string_ids_in_csr_tier() {
1789        use crate::graph::unified::compaction::{Direction, build_compacted_csr, snapshot_edges};
1790        use crate::graph::unified::edge::EdgeKind;
1791        use crate::graph::unified::storage::interner::StringInterner;
1792
1793        let mut graph = CodeGraph::new();
1794        let file_a = FileId::new(1);
1795        let interner: &mut StringInterner = graph.strings_mut();
1796        let start = interner.alloc_range(2).expect("alloc range");
1797        {
1798            let (s, rc) = interner.bulk_slices_mut(start, 2);
1799            s[0] = Some(std::sync::Arc::from("csr_alias_dup"));
1800            s[1] = Some(std::sync::Arc::from("csr_alias_dup"));
1801            rc[0] = 1;
1802            rc[1] = 1;
1803        }
1804        let id_canon = crate::graph::unified::string::id::StringId::new(start);
1805        let id_dup = crate::graph::unified::string::id::StringId::new(start + 1);
1806
1807        let src;
1808        let tgt;
1809        {
1810            let arena = graph.nodes_mut();
1811            src = arena
1812                .alloc(NodeEntry::new(NodeKind::Function, id_canon, file_a))
1813                .expect("alloc src");
1814            tgt = arena
1815                .alloc(NodeEntry::new(NodeKind::Function, id_canon, file_a))
1816                .expect("alloc tgt");
1817        }
1818        graph
1819            .indices_mut()
1820            .add(src, NodeKind::Function, id_canon, None, file_a);
1821        graph
1822            .indices_mut()
1823            .add(tgt, NodeKind::Function, id_canon, None, file_a);
1824
1825        // Push the Imports edge into delta with alias pointing at the
1826        // duplicate StringId slot.
1827        graph.edges_mut().add_edge(
1828            src,
1829            tgt,
1830            EdgeKind::Imports {
1831                alias: Some(id_dup),
1832                is_wildcard: false,
1833            },
1834            file_a,
1835        );
1836
1837        // Compact delta → CSR on both directions. This mirrors what the
1838        // full-build pipeline does after Phase 4d plus an explicit
1839        // compaction step.
1840        let node_count = 2;
1841        let edges = graph.edges_mut();
1842        let fwd_snap = snapshot_edges(&edges.forward(), node_count);
1843        let (forward_csr, _) =
1844            build_compacted_csr(&fwd_snap, Direction::Forward).expect("forward csr");
1845        let rev_snap = snapshot_edges(&edges.reverse(), node_count);
1846        let (reverse_csr, _) =
1847            build_compacted_csr(&rev_snap, Direction::Reverse).expect("reverse csr");
1848        edges.swap_csrs_and_clear_deltas(forward_csr, reverse_csr);
1849        assert!(
1850            edges.forward().csr().is_some(),
1851            "forward CSR must be present"
1852        );
1853        assert!(
1854            edges.reverse().csr().is_some(),
1855            "reverse CSR must be present"
1856        );
1857        assert_eq!(edges.forward().delta_count(), 0, "delta must be empty");
1858        assert_eq!(edges.reverse().delta_count(), 0, "delta must be empty");
1859
1860        let rebuild = graph.clone_for_rebuild();
1861        let finalized = rebuild.finalize().expect("finalize ok");
1862
1863        // CSR tier: every `edge_kind` entry must reference the canonical
1864        // StringId, never the duplicate.
1865        let forward = finalized.edges().forward();
1866        let csr = forward
1867            .csr()
1868            .expect("forward CSR must survive finalize step 1");
1869        let mut imports_seen = 0usize;
1870        for kind in csr.edge_kind() {
1871            if let EdgeKind::Imports { alias, .. } = kind {
1872                imports_seen += 1;
1873                assert_ne!(
1874                    *alias,
1875                    Some(id_dup),
1876                    "CSR Imports alias must not reference pre-dedup duplicate"
1877                );
1878                assert_eq!(
1879                    *alias,
1880                    Some(id_canon),
1881                    "CSR Imports alias must be canonicalised"
1882                );
1883            }
1884        }
1885        assert!(
1886            imports_seen > 0,
1887            "forward CSR must hold at least one Imports edge"
1888        );
1889        drop(forward);
1890
1891        // Reverse CSR — same guarantee.
1892        let reverse = finalized.edges().reverse();
1893        let rev_csr = reverse
1894            .csr()
1895            .expect("reverse CSR must survive finalize step 1");
1896        let mut rev_imports_seen = 0usize;
1897        for kind in rev_csr.edge_kind() {
1898            if let EdgeKind::Imports { alias, .. } = kind {
1899                rev_imports_seen += 1;
1900                assert_ne!(*alias, Some(id_dup));
1901                assert_eq!(*alias, Some(id_canon));
1902            }
1903        }
1904        assert!(rev_imports_seen > 0, "reverse CSR must hold Imports edges");
1905        drop(reverse);
1906
1907        assert_eq!(
1908            finalized.strings().ref_count(id_dup),
1909            0,
1910            "duplicate slot must be recycled (ref_count == 0) by step 1"
1911        );
1912    }
1913
1914    /// Exhaustive coverage: every `StringId`-holding surface *reachable
1915    /// through `CodeGraph`'s public mutation API* must be rewritten by
1916    /// finalize step 1. This test drives each surface with a distinct
1917    /// duplicate `StringId` slot and verifies after finalize that (i)
1918    /// every surface references the canonical slot, never the duplicate;
1919    /// (ii) every duplicate slot is recycled (`ref_count == 0`); (iii)
1920    /// the canonical slot retains a positive ref count.
1921    ///
1922    /// Surfaces exercised:
1923    /// * **Arena** (`NodeEntry.name`, `NodeEntry.qualified_name`)
1924    /// * **AuxiliaryIndices** (`name_index` / `qualified_name_index` keys)
1925    /// * **FileRegistry** (`FileEntry.source_uri`)
1926    /// * **BidirectionalEdgeStore** (`EdgeKind::Imports.alias` — delta tier)
1927    ///
1928    /// `AliasTable` and `ShadowTable` StringId fields live under private
1929    /// API (`pub(crate) fn set_alias_table`, no public entry mutators)
1930    /// and are already covered by the iter-2 `step1_remaps_*` tests above
1931    /// that exercise their `rewrite_string_ids_through_remap` helpers
1932    /// end-to-end through the binding-plane derivation path. Covering
1933    /// them here would require either exposing additional test-only APIs
1934    /// or duplicating the iter-2 coverage; neither adds assurance beyond
1935    /// what the dedicated iter-2 tests already provide.
1936    #[test]
1937    fn step1_remaps_all_stringid_holders_exhaustively() {
1938        use crate::graph::unified::edge::EdgeKind;
1939        use crate::graph::unified::storage::interner::StringInterner;
1940
1941        let mut graph = CodeGraph::new();
1942        let file_a = FileId::new(1);
1943        let interner: &mut StringInterner = graph.strings_mut();
1944        // 10 slots = 5 surfaces × 2 (canon + dup) interleaved.
1945        let start = interner.alloc_range(10).expect("alloc range");
1946        {
1947            let (s, rc) = interner.bulk_slices_mut(start, 10);
1948            for (i, label) in [
1949                "arena_name",
1950                "arena_name",
1951                "arena_qname",
1952                "arena_qname",
1953                "idx_key",
1954                "idx_key",
1955                "edge_alias",
1956                "edge_alias",
1957                "file_uri",
1958                "file_uri",
1959            ]
1960            .iter()
1961            .enumerate()
1962            {
1963                s[i] = Some(std::sync::Arc::from(*label));
1964                rc[i] = 1;
1965            }
1966        }
1967        let sid = |off: u32| crate::graph::unified::string::id::StringId::new(start + off);
1968        let arena_name_canon = sid(0);
1969        let arena_name_dup = sid(1);
1970        let arena_qname_canon = sid(2);
1971        let arena_qname_dup = sid(3);
1972        let idx_key_canon = sid(4);
1973        let idx_key_dup = sid(5);
1974        let edge_alias_canon = sid(6);
1975        let edge_alias_dup = sid(7);
1976        let file_uri_canon = sid(8);
1977        let file_uri_dup = sid(9);
1978
1979        // (a) Arena — name + qualified_name reference duplicate slots.
1980        let node;
1981        let node2;
1982        {
1983            let arena = graph.nodes_mut();
1984            let mut entry = NodeEntry::new(NodeKind::Function, arena_name_dup, file_a);
1985            entry.qualified_name = Some(arena_qname_dup);
1986            node = arena.alloc(entry).expect("alloc arena node");
1987            node2 = arena
1988                .alloc(NodeEntry::new(NodeKind::Function, arena_name_dup, file_a))
1989                .expect("alloc arena node2");
1990        }
1991        // (b) Indices — key under the duplicate StringId.
1992        graph
1993            .indices_mut()
1994            .add(node, NodeKind::Function, idx_key_dup, None, file_a);
1995        graph
1996            .indices_mut()
1997            .add(node2, NodeKind::Function, idx_key_dup, None, file_a);
1998        // (c) File registry — register a file with source_uri = dup.
1999        let fid = graph
2000            .files_mut()
2001            .register_external_with_uri(
2002                "/virtual/Exhaustive.class",
2003                Some(crate::graph::node::Language::Java),
2004                Some(file_uri_dup),
2005            )
2006            .expect("register external");
2007        // (d) Edge store — Imports edge with alias = dup.
2008        graph.edges_mut().add_edge(
2009            node,
2010            node2,
2011            EdgeKind::Imports {
2012                alias: Some(edge_alias_dup),
2013                is_wildcard: false,
2014            },
2015            file_a,
2016        );
2017
2018        let rebuild = graph.clone_for_rebuild();
2019        let finalized = rebuild.finalize().expect("finalize ok");
2020
2021        // (a) Arena: name + qualified_name on every live node.
2022        for nid in [node, node2] {
2023            let entry = finalized.nodes().get(nid).expect("node survives");
2024            assert_eq!(entry.name, arena_name_canon, "arena name canonicalised");
2025            if let Some(q) = entry.qualified_name {
2026                assert_eq!(q, arena_qname_canon, "arena qname canonicalised");
2027            }
2028        }
2029        // (b) Indices: canonical bucket populated, duplicate bucket empty.
2030        assert!(finalized.indices().by_name(idx_key_dup).is_empty());
2031        assert!(!finalized.indices().by_name(idx_key_canon).is_empty());
2032        // (c) File registry.
2033        let view = finalized
2034            .files()
2035            .file_provenance(fid)
2036            .expect("provenance present");
2037        assert_eq!(view.source_uri, Some(file_uri_canon));
2038        // (d) Edge store: every Imports alias canonical on both
2039        // directions. Post-finalize, edges live in CSR (step 9 drained
2040        // the delta into the CSR on both directions). Step 1's remap
2041        // ran on the delta *before* step 9, so the CSR is built from
2042        // the already-remapped delta.
2043        let fwd = finalized.edges().forward();
2044        let fwd_csr = fwd
2045            .csr()
2046            .expect("forward CSR must be populated after step 9");
2047        let mut fwd_imports = 0usize;
2048        for kind in fwd_csr.edge_kind() {
2049            if let EdgeKind::Imports { alias, .. } = kind {
2050                fwd_imports += 1;
2051                assert_ne!(*alias, Some(edge_alias_dup));
2052                assert_eq!(*alias, Some(edge_alias_canon));
2053            }
2054        }
2055        drop(fwd);
2056        assert!(fwd_imports > 0, "forward Imports must be present in CSR");
2057        let rev = finalized.edges().reverse();
2058        let rev_csr = rev
2059            .csr()
2060            .expect("reverse CSR must be populated after step 9");
2061        let mut rev_imports = 0usize;
2062        for kind in rev_csr.edge_kind() {
2063            if let EdgeKind::Imports { alias, .. } = kind {
2064                rev_imports += 1;
2065                assert_ne!(*alias, Some(edge_alias_dup));
2066                assert_eq!(*alias, Some(edge_alias_canon));
2067            }
2068        }
2069        drop(rev);
2070        assert!(rev_imports > 0, "reverse Imports must be present in CSR");
2071
2072        // All duplicate slots recycled; canonical slots retained.
2073        for (dup, canon, label) in [
2074            (arena_name_dup, arena_name_canon, "arena_name"),
2075            (arena_qname_dup, arena_qname_canon, "arena_qname"),
2076            (idx_key_dup, idx_key_canon, "idx_key"),
2077            (edge_alias_dup, edge_alias_canon, "edge_alias"),
2078            (file_uri_dup, file_uri_canon, "file_uri"),
2079        ] {
2080            assert_eq!(
2081                finalized.strings().ref_count(dup),
2082                0,
2083                "{label}: duplicate slot must be recycled (ref_count == 0)"
2084            );
2085            assert!(
2086                finalized.strings().ref_count(canon) > 0,
2087                "{label}: canonical slot must retain references (got {})",
2088                finalized.strings().ref_count(canon)
2089            );
2090        }
2091    }
2092
2093    // ----------------------------------------------------------------
2094    // Iter-2 B2: step 6 + step 13 per_file_nodes / bucket bijection.
2095    // ----------------------------------------------------------------
2096
2097    #[test]
2098    fn finalize_step6_drops_tombstoned_nodes_from_buckets() {
2099        let (graph, a, b, c) = seeded_graph();
2100        // Seed the registry buckets so finalize step 6 has real work.
2101        let file_a = FileId::new(1);
2102        let file_b = FileId::new(2);
2103        {
2104            // Direct access via clone-for-rebuild — we need to populate
2105            // the REBUILD-local registry because `seeded_graph` doesn't.
2106        }
2107        // Populate buckets on the graph first so clone captures them.
2108        {
2109            let files = graph.files();
2110            let _ = files; // ensure immut ref scope
2111        }
2112        let mut graph = graph;
2113        graph.files_mut().record_node(file_a, a);
2114        graph.files_mut().record_node(file_a, b);
2115        graph.files_mut().record_node(file_b, c);
2116
2117        let mut rebuild = graph.clone_for_rebuild();
2118        rebuild.tombstone(a);
2119        rebuild.tombstone(c);
2120        let finalized = rebuild.finalize().expect("finalize ok");
2121
2122        // a and c were tombstoned — must be absent from any bucket.
2123        // b survives under file_a; file_b's bucket collapsed to empty
2124        // and must be dropped.
2125        let buckets: std::collections::BTreeMap<FileId, Vec<crate::graph::unified::node::NodeId>> =
2126            finalized.files().per_file_nodes_for_gate0d().collect();
2127        assert_eq!(buckets.len(), 1, "empty bucket must be dropped");
2128        assert_eq!(buckets.get(&file_a).cloned().unwrap_or_default(), vec![b]);
2129        assert!(!buckets.contains_key(&file_b));
2130    }
2131
2132    #[test]
2133    fn finalize_step6_dedups_within_bucket() {
2134        let (mut graph, a, b, c) = seeded_graph();
2135        let file_a = FileId::new(1);
2136        let file_b = FileId::new(2);
2137        // Bucket every live node consistently with seeded_graph's own
2138        // FileId assignment (so the non-vacuous bijection check passes),
2139        // but duplicate `a` once to exercise step 6's dedup path.
2140        graph.files_mut().record_node(file_a, a);
2141        graph.files_mut().record_node(file_a, a); // intentional duplicate
2142        graph.files_mut().record_node(file_a, b);
2143        graph.files_mut().record_node(file_b, c);
2144        let rebuild = graph.clone_for_rebuild();
2145        let finalized = rebuild.finalize().expect("finalize ok");
2146        let buckets: std::collections::BTreeMap<FileId, Vec<crate::graph::unified::node::NodeId>> =
2147            finalized.files().per_file_nodes_for_gate0d().collect();
2148        let bucket_a = buckets.get(&file_a).expect("bucket for file_a");
2149        // Two live nodes (a, b) in file_a, duplicate `a` collapsed.
2150        assert_eq!(
2151            bucket_a.len(),
2152            2,
2153            "duplicates within bucket must be dedup'd"
2154        );
2155        assert!(bucket_a.contains(&a));
2156        assert!(bucket_a.contains(&b));
2157    }
2158
2159    #[test]
2160    fn finalize_step6_drops_empty_buckets() {
2161        let (mut graph, a, _b, _c) = seeded_graph();
2162        let file = FileId::new(1);
2163        graph.files_mut().record_node(file, a);
2164        let mut rebuild = graph.clone_for_rebuild();
2165        rebuild.tombstone(a); // drops the only bucket member
2166        let finalized = rebuild.finalize().expect("finalize ok");
2167        assert_eq!(
2168            finalized.files().per_file_bucket_count(),
2169            0,
2170            "empty bucket must be dropped by step 6"
2171        );
2172    }
2173
2174    #[test]
2175    fn bucket_bijection_passes_when_every_live_node_is_bucketed() {
2176        let (mut graph, a, b, c) = seeded_graph();
2177        // Bucket every live node consistently with each node's FileId.
2178        // seeded_graph uses file_a=1 for a, b and file_b=2 for c.
2179        let file_a = FileId::new(1);
2180        let file_b = FileId::new(2);
2181        graph.files_mut().record_node(file_a, a);
2182        graph.files_mut().record_node(file_a, b);
2183        graph.files_mut().record_node(file_b, c);
2184
2185        // Round-trip through finalize so the bijection check runs.
2186        let rebuild = graph.clone_for_rebuild();
2187        let finalized = rebuild.finalize().expect("finalize ok");
2188        // Explicit assert_bucket_bijection call also passes.
2189        finalized.assert_bucket_bijection();
2190    }
2191
2192    #[test]
2193    #[should_panic(expected = "duplicate node")]
2194    fn bucket_bijection_detects_duplicate_within_bucket() {
2195        let (mut graph, a, _b, _c) = seeded_graph();
2196        let file_a = FileId::new(1);
2197        // Manually force a duplicate inside a bucket by bypassing
2198        // `retain_nodes_in_buckets`. We achieve this by calling
2199        // `record_node` twice on the live graph and then invoking the
2200        // bijection check directly *without* routing through finalize
2201        // (which would dedup in step 6).
2202        graph.files_mut().record_node(file_a, a);
2203        graph.files_mut().record_node(file_a, a);
2204        graph.assert_bucket_bijection();
2205    }
2206
2207    #[test]
2208    #[should_panic(expected = "misfiled")]
2209    fn bucket_bijection_detects_misfiled_node() {
2210        let (mut graph, a, _b, _c) = seeded_graph();
2211        // Node `a` was allocated against FileId(1); put it in a bucket
2212        // keyed by FileId(99) — the bijection must reject with the
2213        // precise "misfiled" panic message emitted by
2214        // `CodeGraph::assert_bucket_bijection` (concurrent/graph.rs
2215        // around line 865). A broader `expected = "node"` would also
2216        // match the duplicate-node / dead-node / multi-bucket arms, so
2217        // it would not discriminate the specific failure mode under test.
2218        graph.files_mut().record_node(FileId::new(99), a);
2219        graph.assert_bucket_bijection();
2220    }
2221
2222    #[test]
2223    #[should_panic(expected = "absent from all buckets")]
2224    fn bucket_bijection_detects_missing_live_node() {
2225        let (mut graph, a, _b, c) = seeded_graph();
2226        // Bucket one node but not the other; with at least one populated
2227        // bucket, condition (d) becomes strict and the missing live node
2228        // must trigger a panic.
2229        let file_a = FileId::new(1);
2230        let _ = c;
2231        graph.files_mut().record_node(file_a, a);
2232        graph.assert_bucket_bijection();
2233    }
2234
2235    #[test]
2236    #[should_panic(expected = "dead node")]
2237    fn bucket_bijection_detects_dead_node_in_bucket() {
2238        let (mut graph, a, _b, _c) = seeded_graph();
2239        // Tombstone a, then leave it in a bucket — bijection must reject.
2240        let file_a = FileId::new(1);
2241        graph.files_mut().record_node(file_a, a);
2242        graph.nodes_mut().remove(a);
2243        graph.assert_bucket_bijection();
2244    }
2245
2246    // -----------------------------------------------------------------
2247    // Gate 0d — Tombstone-residue negative tests.
2248    //
2249    // Covers both the `RebuildGraph::assert_no_tombstone_residue`
2250    // diagnostic helper and the finalize step-14 publish-boundary
2251    // check. The bijection counterparts live in the block immediately
2252    // above.
2253    // -----------------------------------------------------------------
2254
2255    #[test]
2256    #[should_panic(expected = "still in edge store")]
2257    fn rebuild_graph_residue_detects_tombstone_still_in_edge_store() {
2258        // Gate 0d iter-1 Minor — dedicated negative test for the
2259        // edge-store residue arm.
2260        //
2261        // The residue check iterates K-rows in order (see
2262        // `assert_no_tombstone_residue` above): NodeArena → edges →
2263        // AuxiliaryIndices → macro_metadata → ... — so to exercise the
2264        // `edges` arm specifically we must remove `a` from the
2265        // NodeArena first (so the NodeArena arm does not fire), leave
2266        // the edge that references `a` intact, and tombstone it.
2267        use crate::graph::unified::edge::kind::EdgeKind;
2268        #[cfg(test)]
2269        use crate::graph::unified::edge::kind::ResolvedVia;
2270        let (graph, a, b, _c) = seeded_graph();
2271        let mut rebuild = graph.clone_for_rebuild();
2272        // Seed the edge store with an edge that references `a`. This
2273        // becomes the "dangling reference" the residue check must
2274        // detect: `a` will be removed from the arena but the edge
2275        // keeps pointing at it.
2276        rebuild.edges.add_edge(
2277            a,
2278            b,
2279            EdgeKind::Calls {
2280                argument_count: 0,
2281                is_async: false,
2282                resolved_via: ResolvedVia::Direct,
2283            },
2284            FileId::new(1),
2285        );
2286        // Remove `a` from the arena so the K.A1 arm passes.
2287        rebuild.nodes.remove(a);
2288        rebuild.tombstone(a);
2289        rebuild.assert_no_tombstone_residue();
2290    }
2291
2292    #[test]
2293    #[should_panic(expected = "still in auxiliary indices")]
2294    fn rebuild_graph_residue_detects_tombstone_still_in_auxiliary_indices() {
2295        // The residue check iterates K-rows starting at `NodeArena`, so
2296        // to exercise the `AuxiliaryIndices` arm specifically we must
2297        // first remove `a` from the arena (simulating step 2 of
2298        // finalize), leaving the `AuxiliaryIndices` stale reference
2299        // as the first arm that can trip. This reproduces the exact
2300        // failure mode the pre-finalize helper exists to catch: a bug
2301        // where finalize compacts the arena but forgets one index.
2302        let (graph, a, _b, _c) = seeded_graph();
2303        let mut rebuild = graph.clone_for_rebuild();
2304        rebuild.nodes.remove(a);
2305        rebuild.tombstone(a);
2306        rebuild.assert_no_tombstone_residue();
2307    }
2308
2309    #[test]
2310    #[should_panic(expected = "still in NodeArena")]
2311    fn rebuild_graph_residue_detects_tombstone_still_in_node_arena() {
2312        // NodeArena is the first `NodeIdBearing` in the residue check's
2313        // iteration order, so we can trip it with a raw `tombstone(a)`
2314        // call before we touch any auxiliary index. The live arena
2315        // entry for `a` remains — that is the bug we want to surface.
2316        let (graph, a, _b, _c) = seeded_graph();
2317        let mut rebuild = graph.clone_for_rebuild();
2318        // Prove the arena still contains `a` before the assertion so
2319        // the panic expectation maps to a real condition, not a vacuous
2320        // empty-tombstone skip.
2321        assert!(
2322            rebuild.nodes.get(a).is_some(),
2323            "pre-condition: arena must still contain the staged tombstone"
2324        );
2325        rebuild.tombstone(a);
2326        rebuild.assert_no_tombstone_residue();
2327    }
2328
2329    #[test]
2330    fn rebuild_graph_residue_is_noop_on_empty_tombstone_set() {
2331        // Positive: with no tombstones staged, the assertion must be a
2332        // no-op — otherwise every fresh `clone_for_rebuild` would panic.
2333        let (graph, _a, _b, _c) = seeded_graph();
2334        let rebuild = graph.clone_for_rebuild();
2335        assert_eq!(rebuild.pending_tombstone_count(), 0);
2336        rebuild.assert_no_tombstone_residue();
2337    }
2338
2339    #[test]
2340    #[should_panic(expected = "still in NodeArena")]
2341    fn finalize_step14_residue_detects_live_reference_to_drained_node() {
2342        // Drive the `finalize` step-14 residue assertion through the
2343        // publish-boundary helper. `seeded_graph()` returns a graph in
2344        // which `a` is a live NodeArena entry; passing `a` in the
2345        // `drained` set without actually removing the arena slot
2346        // simulates the bug step 14 exists to catch — step 8 drained
2347        // `a` into `drained_tombstones`, but something failed to
2348        // compact the arena. The residue check's K-row iteration
2349        // starts at `NodeArena`, so that is the arm that fires.
2350        //
2351        // The test routes through the canonical
2352        // `publish::assert_publish_invariants` helper so the test
2353        // exercises the exact code path `finalize` step 14 executes,
2354        // not just the underlying `assert_no_tombstone_residue_for`
2355        // entry point.
2356        let (graph, a, _b, _c) = seeded_graph();
2357        let mut drained: ::std::collections::HashSet<NodeId> = ::std::collections::HashSet::new();
2358        drained.insert(a);
2359        crate::graph::unified::publish::assert_publish_invariants(&graph, &drained);
2360    }
2361
2362    #[test]
2363    fn finalize_with_empty_drained_set_passes_publish_invariants() {
2364        // Positive smoke test: the happy path through finalize (no
2365        // tombstones staged) must pass `assert_publish_invariants`
2366        // unconditionally on every build profile. This is the case the
2367        // full-rebuild `build_unified_graph_inner` end-of-function call
2368        // exercises on every CI run.
2369        let (graph, _a, _b, _c) = seeded_graph();
2370        let file_a = FileId::new(1);
2371        let file_b = FileId::new(2);
2372        let mut graph = graph;
2373        graph.files_mut().record_node(file_a, _a);
2374        graph.files_mut().record_node(file_a, _b);
2375        graph.files_mut().record_node(file_b, _c);
2376
2377        let rebuild = graph.clone_for_rebuild();
2378        let finalized = rebuild.finalize().expect("finalize ok");
2379        crate::graph::unified::publish::assert_publish_invariants(
2380            &finalized,
2381            &::std::collections::HashSet::new(),
2382        );
2383    }
2384
2385    // ---- Task 4 Step 3 — RebuildGraph::remove_file ------------------
2386
2387    /// Seed a graph + rebuild with 2 files × `per_file` nodes and a
2388    /// mix of intra- and cross-file `Calls` edges, then clone for
2389    /// rebuild. Returns `(rebuild, file_a, file_b, file_a_nodes,
2390    /// file_b_nodes)` — mirrors `seed_two_file_graph` in the
2391    /// `concurrent::graph::tests` module but yields the rebuild value
2392    /// so tests here can drive `RebuildGraph::remove_file` directly.
2393    fn seed_two_file_rebuild(
2394        per_file: usize,
2395    ) -> (
2396        RebuildGraph,
2397        crate::graph::unified::file::FileId,
2398        crate::graph::unified::file::FileId,
2399        Vec<NodeId>,
2400        Vec<NodeId>,
2401    ) {
2402        use crate::graph::unified::edge::{EdgeKind, ResolvedVia};
2403        use crate::graph::unified::node::NodeKind;
2404        use crate::graph::unified::storage::arena::NodeEntry;
2405        use std::path::Path;
2406
2407        let mut graph = CodeGraph::new();
2408        let sym = graph.strings_mut().intern("sym").expect("intern");
2409        let file_a = graph
2410            .files_mut()
2411            .register(Path::new("/tmp/rebuild_remove_file_test/a.rs"))
2412            .expect("register a");
2413        let file_b = graph
2414            .files_mut()
2415            .register(Path::new("/tmp/rebuild_remove_file_test/b.rs"))
2416            .expect("register b");
2417
2418        let mut file_a_nodes = Vec::with_capacity(per_file);
2419        let mut file_b_nodes = Vec::with_capacity(per_file);
2420        for _ in 0..per_file {
2421            let n = graph
2422                .nodes_mut()
2423                .alloc(NodeEntry::new(NodeKind::Function, sym, file_a))
2424                .expect("alloc a");
2425            file_a_nodes.push(n);
2426            graph.files_mut().record_node(file_a, n);
2427            graph
2428                .indices_mut()
2429                .add(n, NodeKind::Function, sym, None, file_a);
2430        }
2431        for _ in 0..per_file {
2432            let n = graph
2433                .nodes_mut()
2434                .alloc(NodeEntry::new(NodeKind::Function, sym, file_b))
2435                .expect("alloc b");
2436            file_b_nodes.push(n);
2437            graph.files_mut().record_node(file_b, n);
2438            graph
2439                .indices_mut()
2440                .add(n, NodeKind::Function, sym, None, file_b);
2441        }
2442        for i in 0..per_file.saturating_sub(1) {
2443            graph.edges_mut().add_edge(
2444                file_a_nodes[i],
2445                file_a_nodes[i + 1],
2446                EdgeKind::Calls {
2447                    argument_count: 0,
2448                    is_async: false,
2449                    resolved_via: ResolvedVia::Direct,
2450                },
2451                file_a,
2452            );
2453            graph.edges_mut().add_edge(
2454                file_b_nodes[i],
2455                file_b_nodes[i + 1],
2456                EdgeKind::Calls {
2457                    argument_count: 0,
2458                    is_async: false,
2459                    resolved_via: ResolvedVia::Direct,
2460                },
2461                file_b,
2462            );
2463        }
2464        graph.edges_mut().add_edge(
2465            file_a_nodes[0],
2466            file_b_nodes[0],
2467            EdgeKind::Calls {
2468                argument_count: 0,
2469                is_async: false,
2470                resolved_via: ResolvedVia::Direct,
2471            },
2472            file_a,
2473        );
2474        graph.edges_mut().add_edge(
2475            file_b_nodes[0],
2476            file_a_nodes[0],
2477            EdgeKind::Calls {
2478                argument_count: 0,
2479                is_async: false,
2480                resolved_via: ResolvedVia::Direct,
2481            },
2482            file_b,
2483        );
2484
2485        let rebuild = graph.clone_for_rebuild();
2486        (rebuild, file_a, file_b, file_a_nodes, file_b_nodes)
2487    }
2488
2489    #[test]
2490    fn rebuild_remove_file_tombstones_all_per_file_nodes() {
2491        let (mut rebuild, file_a, _file_b, file_a_nodes, _) = seed_two_file_rebuild(3);
2492
2493        let returned = rebuild.remove_file(file_a);
2494
2495        let returned_set: std::collections::HashSet<NodeId> = returned.iter().copied().collect();
2496        let expected_set: std::collections::HashSet<NodeId> =
2497            file_a_nodes.iter().copied().collect();
2498        assert_eq!(
2499            returned_set, expected_set,
2500            "remove_file must return exactly the file_a nodes drained from the bucket"
2501        );
2502        // Each returned NodeId is arena-gone on the rebuild.
2503        for nid in &file_a_nodes {
2504            assert!(
2505                rebuild.nodes.get(*nid).is_none(),
2506                "node {nid:?} from removed file must be tombstoned on rebuild arena"
2507            );
2508        }
2509        // And staged for the finalize-time NodeIdBearing sweep.
2510        assert_eq!(rebuild.pending_tombstone_count(), file_a_nodes.len());
2511    }
2512
2513    #[test]
2514    fn rebuild_remove_file_invalidates_all_edges_sourced_or_targeted_at_removed_nodes() {
2515        let (mut rebuild, file_a, _file_b, file_a_nodes, file_b_nodes) = seed_two_file_rebuild(3);
2516
2517        // Seed produces 6 forward delta edges (2 intra-A + 2 intra-B
2518        // + 2 cross). The rebuild's own forward/reverse edge stores
2519        // mirror this.
2520        assert_eq!(rebuild.edges.forward().delta().len(), 6);
2521
2522        let _ = rebuild.remove_file(file_a);
2523
2524        // After removal: only 2 intra-B edges remain in each direction.
2525        assert_eq!(rebuild.edges.forward().delta().len(), 2);
2526        assert_eq!(rebuild.edges.reverse().delta().len(), 2);
2527
2528        // Cross-file edge b[0] -> a[0] must be gone from any direction.
2529        let b0 = file_b_nodes[0];
2530        let a0 = file_a_nodes[0];
2531        let from_b0: Vec<_> = rebuild
2532            .edges
2533            .edges_from(b0)
2534            .into_iter()
2535            .filter(|e| e.target == a0)
2536            .collect();
2537        assert!(
2538            from_b0.is_empty(),
2539            "edge b0 -> a0 must be gone after rebuild.remove_file(file_a)"
2540        );
2541    }
2542
2543    #[test]
2544    fn rebuild_remove_file_drops_file_registry_entry() {
2545        let (mut rebuild, file_a, _file_b, _, _) = seed_two_file_rebuild(2);
2546
2547        assert!(rebuild.files.resolve(file_a).is_some());
2548        assert!(!rebuild.files.nodes_for_file(file_a).is_empty());
2549
2550        let _ = rebuild.remove_file(file_a);
2551
2552        assert!(
2553            rebuild.files.resolve(file_a).is_none(),
2554            "rebuild FileRegistry entry must be gone"
2555        );
2556        assert!(
2557            rebuild.files.nodes_for_file(file_a).is_empty(),
2558            "rebuild per-file bucket for file_a must be drained"
2559        );
2560    }
2561
2562    #[test]
2563    fn rebuild_remove_file_is_idempotent_on_unknown_file() {
2564        use crate::graph::unified::file::FileId;
2565        let (mut rebuild, _file_a, _file_b, _, _) = seed_two_file_rebuild(2);
2566
2567        let nodes_before = rebuild.nodes.len();
2568        let delta_fwd_before = rebuild.edges.forward().delta().len();
2569        let delta_rev_before = rebuild.edges.reverse().delta().len();
2570        let tombstones_before = rebuild.pending_tombstone_count();
2571
2572        let bogus = FileId::new(9999);
2573        let returned = rebuild.remove_file(bogus);
2574        assert!(returned.is_empty());
2575
2576        assert_eq!(rebuild.nodes.len(), nodes_before);
2577        assert_eq!(rebuild.edges.forward().delta().len(), delta_fwd_before);
2578        assert_eq!(rebuild.edges.reverse().delta().len(), delta_rev_before);
2579        assert_eq!(rebuild.pending_tombstone_count(), tombstones_before);
2580    }
2581
2582    #[test]
2583    fn rebuild_remove_file_stages_tombstones_for_finalize_sweep() {
2584        // The whole point of RebuildGraph::remove_file deferring the
2585        // K.A/K.B sweep to finalize() is that the sweep happens exactly
2586        // once against the union of every file's tombstones. Drive this
2587        // end-to-end: remove both files, finalize, and assert the
2588        // publish-boundary invariants hold.
2589        let (mut rebuild, file_a, file_b, file_a_nodes, file_b_nodes) = seed_two_file_rebuild(2);
2590
2591        let _ = rebuild.remove_file(file_a);
2592        let _ = rebuild.remove_file(file_b);
2593
2594        // Pending tombstones: union of both files' nodes.
2595        assert_eq!(
2596            rebuild.pending_tombstone_count(),
2597            file_a_nodes.len() + file_b_nodes.len()
2598        );
2599
2600        // Pre-finalize residue check against the rebuild-local state
2601        // is expected to pass: every NodeIdBearing surface on the
2602        // rebuild must already be clean of the tombstoned IDs (via the
2603        // immediate arena + edge tombstoning in step 1–2 of
2604        // remove_file) — NO, the NodeIdBearing K.A/K.B surfaces beyond
2605        // NodeArena/edges are NOT touched before finalize. The
2606        // assert_no_tombstone_residue helper on RebuildGraph walks
2607        // every surface, so it will legitimately find tombstones in
2608        // the auxiliary indices + metadata stores until finalize runs.
2609        //
2610        // So: do NOT run the pre-finalize residue check here. Instead,
2611        // confirm finalize runs successfully and the assembled
2612        // CodeGraph passes the publish-boundary invariants against
2613        // the drained set — that is the real post-condition.
2614        let finalized = rebuild.finalize().expect("finalize must succeed");
2615
2616        // Every surface on the finalized CodeGraph must be clean of the
2617        // tombstoned ids. Use the publish-boundary residue helper
2618        // against an empty dead set (finalize's own step-14 call already
2619        // covered the drained set; we re-verify the bijection for
2620        // paranoia).
2621        crate::graph::unified::publish::assert_publish_bijection(&finalized);
2622        // Arena is empty (every seeded node was tombstoned).
2623        assert_eq!(finalized.nodes().len(), 0);
2624        // No file registration survives.
2625        assert!(finalized.files().resolve(file_a).is_none());
2626        assert!(finalized.files().resolve(file_b).is_none());
2627    }
2628
2629    #[test]
2630    fn rebuild_remove_file_repeated_calls_are_idempotent() {
2631        let (mut rebuild, file_a, _file_b, file_a_nodes, _) = seed_two_file_rebuild(3);
2632
2633        let first = rebuild.remove_file(file_a);
2634        assert_eq!(first.len(), file_a_nodes.len());
2635        let staged = rebuild.pending_tombstone_count();
2636
2637        // Second call: bucket is already drained, NodeArena::remove
2638        // ignores stale generations, and the tombstones set should not
2639        // grow. The immediate tombstone side effects are idempotent.
2640        let second = rebuild.remove_file(file_a);
2641        assert!(second.is_empty());
2642        assert_eq!(rebuild.pending_tombstone_count(), staged);
2643    }
2644
2645    #[test]
2646    fn rebuild_remove_file_clears_file_segments_entry() {
2647        // Iter-1 Codex review fix mirror of the CodeGraph-side test
2648        // in `concurrent/graph.rs`. Seed a segment for file A, invoke
2649        // `RebuildGraph::remove_file`, and assert both the
2650        // rebuild-local `file_segments` and the finalized `CodeGraph`'s
2651        // `file_segments` table carry no entry for file A. The second
2652        // half of the check is critical: `finalize()` step 12 publishes
2653        // `self.file_segments` verbatim, so a missing clear here would
2654        // leak the stale range into the publishable graph.
2655        use std::path::Path;
2656
2657        let (mut rebuild, file_a, _file_b, file_a_nodes, _) = seed_two_file_rebuild(3);
2658
2659        // Seed a segment for file A. Fish out the span from the
2660        // allocated NodeIds to mirror the shape `phase3_parallel_commit`
2661        // produces.
2662        let first_index = file_a_nodes
2663            .iter()
2664            .map(|n| n.index())
2665            .min()
2666            .expect("per_file = 3");
2667        let last_index = file_a_nodes
2668            .iter()
2669            .map(|n| n.index())
2670            .max()
2671            .expect("per_file = 3");
2672        let slot_count = last_index - first_index + 1;
2673        rebuild
2674            .file_segments
2675            .record_range(file_a, first_index, slot_count);
2676        assert!(
2677            rebuild.file_segments().get(file_a).is_some(),
2678            "seeded segment for file_a must be present before remove_file"
2679        );
2680
2681        // Act.
2682        let _ = rebuild.remove_file(file_a);
2683
2684        // Rebuild-local post-condition.
2685        assert!(
2686            rebuild.file_segments().get(file_a).is_none(),
2687            "RebuildGraph::remove_file must clear the file_segments entry"
2688        );
2689
2690        // Finalize post-condition: the published CodeGraph must also
2691        // carry no entry for file_a.
2692        let finalized = rebuild.finalize().expect("finalize must succeed");
2693        assert!(
2694            finalized.file_segments().get(file_a).is_none(),
2695            "finalize must publish a CodeGraph with no stale file_segments for file_a"
2696        );
2697
2698        // Defensive suppression: the Path import is used only by the
2699        // adjacent `seed_two_file_rebuild` helper when the signature
2700        // is exercised — the let-binding below keeps this test
2701        // self-contained even if the helper's surface changes later.
2702        let _ = Path::new("");
2703    }
2704}