sqry_core/graph/unified/build/parallel_commit.rs
1//! Parallel commit pipeline for pre-allocated ID ranges.
2//!
3//! Replaces the serial commit loop with a four-phase pipeline:
4//! Phase 2: Count + range assignment via prefix sums
5//! Phase 3: Parallel commit into disjoint pre-allocated ranges
6//! Phase 4: String dedup, remap, index build, edge bulk insert
7//!
8//! # Phase 3 Architecture
9//!
10//! Phase 3 uses `split_at_mut` to carve disjoint sub-slices from pre-allocated
11//! arena and interner ranges, then uses `rayon` to commit each file's staging
12//! graph in parallel without locks:
13//!
14//! ```text
15//! NodeArena slots: [ file0 | file1 | file2 ]
16//! StringInterner: [ file0 | file1 | file2 ]
17//! ↑ ↑ ↑
18//! split_at_mut split_at_mut remainder
19//! ```
20//!
21//! Each file's `commit_single_file` receives its own disjoint slices and
22//! operates independently without contention.
23
24use std::collections::HashMap;
25use std::ops::Range;
26use std::sync::Arc;
27
28use rayon::prelude::*;
29
30use crate::graph::unified::edge::delta::{DeltaEdge, DeltaOp};
31#[cfg(test)]
32use crate::graph::unified::edge::kind::ResolvedVia;
33use crate::graph::unified::edge::kind::{EdgeKind, MqProtocol};
34use crate::graph::unified::file::FileId;
35use crate::graph::unified::node::NodeId;
36use crate::graph::unified::storage::NodeArena;
37use crate::graph::unified::storage::arena::{NodeEntry, Slot};
38use crate::graph::unified::storage::c_indirect::LocalScopeIndex;
39use crate::graph::unified::string::StringId;
40
41use super::pass3_intra::PendingEdge;
42use super::staging::{
43 GoEmbeddingHint, GoFunctionSignatureHint, GoMethodReceiverHint, GoMethodSignatureHint,
44 GoNamedTypeConversionHint, GoReceiverCallHint, GoReceiverHintKind, PendingBinding,
45 PendingIndirectCallsite, StagingGraph, StagingOp,
46};
47
48/// Running offsets carried across chunks for deterministic ID assignment.
49///
50/// Each chunk's ranges begin where the previous chunk ended, ensuring
51/// globally unique, contiguous ID spaces.
52#[derive(Debug, Clone, Default)]
53pub struct GlobalOffsets {
54 /// Next available node slot index.
55 pub node_offset: u32,
56 /// Next available string slot index.
57 pub string_offset: u32,
58}
59
60/// Per-file commit plan with pre-assigned ID ranges.
61#[derive(Debug, Clone)]
62pub struct FilePlan {
63 /// Index into the chunk's `ParsedFile` vec.
64 pub parsed_index: usize,
65 /// Pre-assigned `FileId` from batch registration.
66 pub file_id: FileId,
67 /// Node slot range [start..end) in `NodeArena`.
68 pub node_range: Range<u32>,
69 /// String slot range [start..end) in `StringInterner`.
70 pub string_range: Range<u32>,
71}
72
73/// Plan for parallel commit of a single chunk.
74#[derive(Debug, Clone)]
75pub struct ChunkCommitPlan {
76 /// Per-file plans in deterministic file order.
77 pub file_plans: Vec<FilePlan>,
78 /// Total nodes across all files in this chunk.
79 pub total_nodes: u32,
80 /// Total strings across all files in this chunk.
81 pub total_strings: u32,
82 /// Total edges across all files in this chunk.
83 pub total_edges: u64,
84}
85
86/// Compute commit plan from parsed files using prefix-sum range assignment.
87///
88/// Each file gets contiguous, non-overlapping ranges for nodes and strings.
89/// Ranges start from the given global offsets, which carry forward across
90/// chunks.
91///
92/// # Arguments
93///
94/// * `node_counts` - Per-file node counts (from `StagingGraph::node_count_u32()`)
95/// * `string_counts` - Per-file string counts
96/// * `edge_counts` - Per-file edge counts (used for `total_edges` only)
97/// * `file_ids` - Pre-assigned `FileId`s from batch registration
98/// * `node_offset` - Running global node offset across chunks
99/// * `string_offset` - Running global string offset across chunks
100///
101/// # Panics
102///
103/// Panics in debug builds if the per-chunk accounting arrays do not have
104/// identical lengths.
105#[must_use]
106pub fn compute_commit_plan(
107 node_counts: &[u32],
108 string_counts: &[u32],
109 edge_counts: &[u32],
110 file_ids: &[FileId],
111 node_offset: u32,
112 string_offset: u32,
113) -> ChunkCommitPlan {
114 debug_assert_eq!(node_counts.len(), string_counts.len());
115 debug_assert_eq!(node_counts.len(), edge_counts.len());
116 debug_assert_eq!(node_counts.len(), file_ids.len());
117
118 let mut plans = Vec::with_capacity(node_counts.len());
119 let mut node_cursor = node_offset;
120 let mut string_cursor = string_offset;
121 let mut total_edges: u64 = 0;
122
123 for i in 0..node_counts.len() {
124 let nc = node_counts[i];
125 let sc = string_counts[i];
126
127 let node_end = node_cursor
128 .checked_add(nc)
129 .expect("node ID space overflow in commit plan");
130 let string_end = string_cursor
131 .checked_add(sc)
132 .expect("string ID space overflow in commit plan");
133
134 plans.push(FilePlan {
135 parsed_index: i,
136 file_id: file_ids[i],
137 node_range: node_cursor..node_end,
138 string_range: string_cursor..string_end,
139 });
140
141 node_cursor = node_end;
142 string_cursor = string_end;
143 total_edges += u64::from(edge_counts[i]);
144 }
145
146 ChunkCommitPlan {
147 file_plans: plans,
148 total_nodes: node_cursor - node_offset,
149 total_strings: string_cursor - string_offset,
150 total_edges,
151 }
152}
153
154/// Execute Phase 2: count + range assignment for a parsed chunk.
155///
156/// Extracts per-file counts from staging graphs and delegates to
157/// [`compute_commit_plan`] for prefix-sum range assignment.
158#[must_use]
159pub fn phase2_assign_ranges(
160 staging_graphs: &[&StagingGraph],
161 file_ids: &[FileId],
162 offsets: &GlobalOffsets,
163) -> ChunkCommitPlan {
164 let node_counts: Vec<u32> = staging_graphs
165 .iter()
166 .map(|sg| sg.node_count_u32())
167 .collect();
168 let string_counts: Vec<u32> = staging_graphs
169 .iter()
170 .map(|sg| sg.string_count_u32())
171 .collect();
172 let edge_counts: Vec<u32> = staging_graphs
173 .iter()
174 .map(|sg| sg.edge_count_u32())
175 .collect();
176
177 compute_commit_plan(
178 &node_counts,
179 &string_counts,
180 &edge_counts,
181 file_ids,
182 offsets.node_offset,
183 offsets.string_offset,
184 )
185}
186
187/// Phase 3 result: per-file edges, per-file node IDs, and total written
188/// counts for validation.
189pub struct Phase3Result {
190 /// Per-file edge collections for Phase 4 bulk insert.
191 pub per_file_edges: Vec<Vec<PendingEdge>>,
192 /// Per-file node IDs actually committed. Indexed identically to
193 /// `per_file_edges` — element `i` is the Vec of NodeIds committed
194 /// for `plan.file_plans[i]`. Empty Vec when that file wrote no
195 /// nodes (slot overflow skip, or a staging graph with only strings).
196 ///
197 /// Used by the caller to populate
198 /// [`crate::graph::unified::storage::registry::FileRegistry::record_node`],
199 /// which feeds the Gate 0c bucket-bijection debug invariant.
200 pub per_file_node_ids: Vec<Vec<NodeId>>,
201 /// Total nodes actually written (for validation against planned totals).
202 pub total_nodes_written: usize,
203 /// Total strings actually written (for validation against planned totals).
204 pub total_strings_written: usize,
205 /// Total edges collected across all files.
206 pub total_edges_collected: usize,
207 /// Per-chunk drained C indirect-call staging payloads (DESIGN §8.2).
208 ///
209 /// Populated when any file in the chunk staged a
210 /// [`super::staging::CIndirectStagingPayload`] (C plugin Phase 1, U10).
211 /// `None` for chunks containing no C files, keeping the wire-shape
212 /// budget unchanged for non-C workspaces.
213 ///
214 /// Consumed by [`apply_c_indirect_drain`] from `entrypoint.rs` after
215 /// Phase 4c-prime cross-file unification rebuilds the qualified-name
216 /// index — see U11 plumbing for the full Phase 3 → Phase 4 hand-off.
217 pub c_indirect_drain: Option<PhaseCIndirectDrain>,
218}
219
220/// Drained C indirect-call staging payload, resolved to owned `String`s.
221///
222/// The per-file
223/// [`super::staging::CIndirectStagingPayload`] contains:
224/// * `pending_address_taken_names: Vec<StringId>` — staging-local string
225/// ids that we resolve to owned `String`s via `staging.resolve_local_string`
226/// here so the post-4c-prime applier can re-intern through the canonical
227/// interner without holding any staging-graph reference;
228/// * `pending_struct_field_signatures: Vec<(String, String, String)>` —
229/// already owned;
230/// * `pending_bindings: Vec<PendingBinding>` — already owned;
231/// * `pending_indirect_callsites: Vec<PendingIndirectCallsite>` — already
232/// owned (carrier-side stamping of `FileId` happens here so the applier
233/// does not need per-file context);
234/// * `local_scope_index: Option<LocalScopeIndex>` — moved verbatim.
235///
236/// The applier ([`apply_c_indirect_drain`]) interns the owned strings into
237/// the **post-Phase-4a-dedup** graph interner, resolves names to canonical
238/// `NodeId`s via [`crate::graph::unified::storage::indices::AuxiliaryIndices::by_qualified_name`]
239/// (with a `by_name` fallback for languages whose canonical qualified name
240/// equals the semantic name and therefore leaves
241/// [`NodeEntry::qualified_name`] unset — e.g. C, where `cb_alpha` is its
242/// own qualified name), and writes them into
243/// [`CodeGraph::c_indirect_tables_mut`].
244///
245/// Per DESIGN §8.2, this drain bridges the parallel-parse-and-commit
246/// boundary (Phase 3) to the post-unification application step (Phase 4
247/// finalisation, just after Phase 4c-prime returns).
248#[derive(Debug, Default)]
249pub struct PhaseCIndirectDrain {
250 /// Address-taken function qualified-name entries to mark post-unification.
251 ///
252 /// Each entry pairs the bare/qualified function name the C plugin
253 /// captured in `helper.mark_function_address_taken_by_name(...)` with
254 /// the source `FileId` (always a C-language file by construction —
255 /// only the C plugin populates `CIndirectStagingPayload`).
256 ///
257 /// Per DESIGN §8.2 lines 1239-1241: "A pending list of
258 /// `(function_qualified_name, file_id)` for address-taken marks". The
259 /// `file_id` is the *origin* file (where the address-take site lives),
260 /// not the file of the resolved callable target. It is carried so the
261 /// applier can constrain the workspace-global `by_name` fallback in
262 /// [`crate::graph::unified::build::entrypoint::apply_deferred_address_taken_marks`]
263 /// to candidate nodes whose own owning file's language is `C` — a
264 /// non-C namesake (e.g. a Rust `fn cb_alpha`) must NOT be marked by
265 /// the C-scoped contract of SPEC §3.1.2.
266 ///
267 /// Duplicates on `function_qualified_name` are tolerated —
268 /// [`crate::graph::unified::storage::metadata::NodeMetadataStore::mark_address_taken`]
269 /// is idempotent.
270 pub address_taken_names: Vec<DeferredAddressTakenEntry>,
271 /// `(struct_tag, field_name, signature)` triples — DESIGN §3.2.2.
272 ///
273 /// Drained verbatim from the staging payload. The applier interns each
274 /// leg via `graph.strings_mut().intern(...)` and inserts into
275 /// `CIndirectSideTables::struct_field_fnptr`.
276 pub struct_field_signatures: Vec<(String, String, String)>,
277 /// Binding-plane entries (DESIGN §7.1) paired with their origin `FileId`.
278 ///
279 /// The applier resolves `instance_name` and `target_fn_name` to
280 /// canonical `NodeId`s and inserts a [`BindingEntry`] under the
281 /// interned `(struct_tag, field_name)` key in
282 /// `CIndirectSideTables::bindings_by_field`. The `FileId` is the
283 /// origin file (the C TU that staged the binding), retained for the
284 /// same C-language-scoped fallback rationale as
285 /// [`Self::address_taken_names`].
286 pub bindings: Vec<(FileId, PendingBinding)>,
287 /// Indirect callsites paired with their owning `FileId`. The applier
288 /// resolves `caller_qualified_name` to a `NodeId` and pushes an
289 /// [`IndirectCallsite`] onto `CIndirectSideTables::pending_callsites`.
290 /// `FileId` is stamped here from the per-file `FilePlan` so the applier
291 /// does not need per-file context.
292 pub indirect_callsites: Vec<(FileId, PendingIndirectCallsite)>,
293 /// Per-file block-scope arenas (DESIGN §4.1). Moved verbatim into
294 /// `CIndirectSideTables::local_scope_indices` keyed by `FileId`.
295 pub local_scope_indices: Vec<(FileId, LocalScopeIndex)>,
296}
297
298/// One deferred address-taken mark, carrying the origin `FileId`
299/// alongside the qualified function name (DESIGN §8.2 lines 1239-1241).
300///
301/// The origin `FileId` is always a C-language file by construction (only
302/// the C plugin populates `CIndirectStagingPayload`). It is retained on
303/// the drain so the post-unification applier can constrain the
304/// workspace-global `by_name` fallback to candidate nodes whose own
305/// owning file's language is `C` — defending against the SPEC §3.1.2
306/// "Every C `NodeKind::Function`" contract being widened to mark
307/// same-named non-C nodes (e.g. Rust `fn cb_alpha`, Python `def
308/// cb_alpha`) that happen to share a bare name with a C symbol.
309#[derive(Debug, Clone, PartialEq, Eq)]
310pub struct DeferredAddressTakenEntry {
311 /// Qualified function name as captured by
312 /// `helper.mark_function_address_taken_by_name(...)`.
313 pub function_qualified_name: String,
314 /// Origin C file that staged this address-taken site. Used only as
315 /// metadata for DESIGN §8.2 conformance and provenance — the
316 /// candidate-language filter in the applier compares each
317 /// candidate's owning-file language to `Language::C`, not to this
318 /// `file_id` directly (cross-TU address-takes are legal: a
319 /// `cb_alpha` declared in `a.c` may have its address taken in
320 /// `b.c`).
321 pub file_id: FileId,
322}
323
324impl PhaseCIndirectDrain {
325 /// Returns `true` when every drained vec/map is empty.
326 ///
327 /// Used by the chunk-accumulator in `entrypoint.rs` to skip Phase 4
328 /// application entirely for non-C workspaces, keeping the
329 /// `CodeGraph.c_indirect_tables` slot at its default `None`.
330 #[must_use]
331 pub fn is_empty(&self) -> bool {
332 self.address_taken_names.is_empty()
333 && self.struct_field_signatures.is_empty()
334 && self.bindings.is_empty()
335 && self.indirect_callsites.is_empty()
336 && self.local_scope_indices.is_empty()
337 }
338
339 /// Merge another drain into this one, taking ownership of its contents.
340 ///
341 /// Used by the chunk-loop in `entrypoint.rs` to accumulate per-chunk
342 /// drains into a single workspace-global drain before invoking
343 /// [`apply_c_indirect_drain`].
344 pub fn merge(&mut self, mut other: PhaseCIndirectDrain) {
345 self.address_taken_names
346 .append(&mut other.address_taken_names);
347 self.struct_field_signatures
348 .append(&mut other.struct_field_signatures);
349 self.bindings.append(&mut other.bindings);
350 self.indirect_callsites
351 .append(&mut other.indirect_callsites);
352 self.local_scope_indices
353 .append(&mut other.local_scope_indices);
354 }
355}
356
357/// Execute Phase 3: parallel commit into disjoint pre-allocated ranges.
358///
359/// Pre-splits arena and interner slices into per-file disjoint sub-slices
360/// using `split_at_mut`, then uses `rayon` `par_iter` for lock-free parallel
361/// writes. Each file's staging graph is committed independently.
362///
363/// Returns [`Phase3Result`] with per-file edges and written counts so the
364/// caller can validate against plan totals and truncate allocations on
365/// mismatch.
366///
367/// # Parameterisation over the mutation target
368///
369/// As of Task 4 Step 4 Phase 1, this function is generic over
370/// `G: GraphMutationTarget`. At the full-build call site in
371/// `build_unified_graph_inner` the target is `CodeGraph`; at the
372/// Task 4 Step 4 Phase 2+ incremental rebuild call site the target
373/// will be `RebuildGraph`. Both impls live in
374/// [`crate::graph::unified::mutation_target`]; see that module's
375/// docs for the field-coverage contract.
376///
377/// The function accesses exactly two fields via the trait —
378/// [`GraphMutationTarget::nodes_and_strings_mut`] — and pre-splits
379/// those two slices for the per-file parallel commit. Every other
380/// piece of the pipeline (the CSR/delta edge store, auxiliary
381/// indices, file registry, etc.) is untouched by this helper: the
382/// `PendingEdge` vectors in the returned [`Phase3Result`] are
383/// threaded through to Phase 4d (`pending_edges_to_delta` +
384/// `BidirectionalEdgeStore::add_edges_bulk_ordered`) by the caller.
385///
386/// # Panics
387///
388/// Panics if `plan.total_nodes` or `plan.total_strings` exceeds the
389/// pre-allocated range in the arena or interner.
390#[must_use]
391pub(crate) fn phase3_parallel_commit<
392 G: crate::graph::unified::mutation_target::GraphMutationTarget,
393>(
394 plan: &ChunkCommitPlan,
395 staging_graphs: &[&StagingGraph],
396 graph: &mut G,
397) -> Phase3Result {
398 if plan.file_plans.is_empty() {
399 return Phase3Result {
400 per_file_edges: Vec::new(),
401 per_file_node_ids: Vec::new(),
402 total_nodes_written: 0,
403 total_strings_written: 0,
404 total_edges_collected: 0,
405 c_indirect_drain: None,
406 };
407 }
408
409 // Determine the start of the pre-allocated ranges.
410 let node_start = plan.file_plans[0].node_range.start;
411 let string_start = plan.file_plans[0].string_range.start;
412
413 // Borrow the arena and interner disjointly via the mutation-plane
414 // trait. This is the one-and-only field access this helper makes
415 // on the graph; every downstream step operates on the resulting
416 // slices without revisiting `graph`.
417 let (arena, interner) = graph.nodes_and_strings_mut();
418
419 // Get mutable slices covering the entire pre-allocated region.
420 let node_slice = arena.bulk_slice_mut(node_start, plan.total_nodes);
421 let (str_slice, rc_slice) = interner.bulk_slices_mut(string_start, plan.total_strings);
422
423 // Pre-split into per-file disjoint sub-slices using split_at_mut.
424 let mut node_remaining = &mut *node_slice;
425 let mut str_remaining = &mut *str_slice;
426 let mut rc_remaining = &mut *rc_slice;
427
428 #[allow(clippy::type_complexity)]
429 let mut file_work: Vec<(
430 &mut [Slot<NodeEntry>],
431 &mut [Option<Arc<str>>],
432 &mut [u32],
433 &FilePlan,
434 usize,
435 )> = Vec::with_capacity(plan.file_plans.len());
436
437 for (i, file_plan) in plan.file_plans.iter().enumerate() {
438 let nc = (file_plan.node_range.end - file_plan.node_range.start) as usize;
439 let sc = (file_plan.string_range.end - file_plan.string_range.start) as usize;
440
441 let (n, nr) = node_remaining.split_at_mut(nc);
442 let (s, sr) = str_remaining.split_at_mut(sc);
443 let (r, rr) = rc_remaining.split_at_mut(sc);
444
445 file_work.push((n, s, r, file_plan, i));
446 node_remaining = nr;
447 str_remaining = sr;
448 rc_remaining = rr;
449 }
450
451 // Parallel commit — each closure owns disjoint slices, no contention.
452 let results: Vec<FileCommitResult> = file_work
453 .into_par_iter()
454 .map(|(node_slots, str_slots, rc_slots, file_plan, idx)| {
455 commit_single_file(
456 staging_graphs[idx],
457 file_plan,
458 node_slots,
459 str_slots,
460 rc_slots,
461 )
462 })
463 .collect();
464
465 let total_nodes_written: usize = results.iter().map(|r| r.nodes_written).sum();
466 let total_strings_written: usize = results.iter().map(|r| r.strings_written).sum();
467 let total_edges_collected: usize = results.iter().map(|r| r.edges.len()).sum();
468 let mut per_file_edges = Vec::with_capacity(results.len());
469 let mut per_file_node_ids = Vec::with_capacity(results.len());
470
471 // Cluster B3 (Go T1): aggregate per-file remapped Go hints. Each
472 // file's hints have already been remapped through that file's
473 // local→global NodeId / StringId tables inside `commit_single_file`,
474 // so the merge into the live target is a straightforward extend.
475 let mut all_embedding_hints: Vec<GoEmbeddingHint> = Vec::new();
476 let mut all_named_type_conversion_hints: Vec<GoNamedTypeConversionHint> = Vec::new();
477 let mut all_receiver_call_hints: Vec<GoReceiverCallHint> = Vec::new();
478 // Cluster D2.1: receiver-pointerness per Go method declaration. Same
479 // commit-time NodeId / StringId remap discipline as the other three
480 // vectors above; the consumer is Cluster D2's T1.1 pass and D2's
481 // tightening of D1's bucket classifier.
482 let mut all_method_receiver_hints: Vec<GoMethodReceiverHint> = Vec::new();
483 // Cluster D3 (Go T1): canonical signatures per Go method / function
484 // / named function-type declaration. Same remap discipline as the
485 // four hint vectors above. Consumer is the tightened T1.1
486 // satisfaction predicate and the new T1.3 function-signature
487 // implementation pass.
488 let mut all_method_signature_hints: Vec<GoMethodSignatureHint> = Vec::new();
489 let mut all_function_signature_hints: Vec<GoFunctionSignatureHint> = Vec::new();
490
491 for r in results {
492 per_file_edges.push(r.edges);
493 per_file_node_ids.push(r.node_ids);
494 all_embedding_hints.extend(r.embedding_hints);
495 all_named_type_conversion_hints.extend(r.named_type_conversion_hints);
496 all_receiver_call_hints.extend(r.receiver_call_hints);
497 all_method_receiver_hints.extend(r.method_receiver_hints);
498 all_method_signature_hints.extend(r.method_signature_hints);
499 all_function_signature_hints.extend(r.function_signature_hints);
500 }
501
502 // Cluster B3 / D2.1 / D3: merge aggregated hints into the live
503 // target. This closes the deferred wire-through noted in Cluster A:
504 // every per-file `StagingGraph::go_hints` now lands in the live
505 // target's `GoHints` buffer during Phase 3, with all NodeId /
506 // StringId references remapped to global identities. The
507 // post-Phase-4e `pass_go_method_set_satisfaction` will drain this
508 // buffer.
509 if !all_embedding_hints.is_empty()
510 || !all_named_type_conversion_hints.is_empty()
511 || !all_receiver_call_hints.is_empty()
512 || !all_method_receiver_hints.is_empty()
513 || !all_method_signature_hints.is_empty()
514 || !all_function_signature_hints.is_empty()
515 {
516 let go_hints = graph.go_hints_mut();
517 go_hints.embeddings.extend(all_embedding_hints);
518 go_hints
519 .named_type_conversions
520 .extend(all_named_type_conversion_hints);
521 go_hints.receiver_calls.extend(all_receiver_call_hints);
522 go_hints.method_receivers.extend(all_method_receiver_hints);
523 go_hints
524 .method_signatures
525 .extend(all_method_signature_hints);
526 go_hints
527 .function_signatures
528 .extend(all_function_signature_hints);
529 }
530
531 // --- C indirect-call drain (DESIGN §8.2 / U11) ---
532 //
533 // Sequentially walk the per-file staging graphs and drain each
534 // `CIndirectStagingPayload` into a single per-chunk
535 // [`PhaseCIndirectDrain`]. Sequential (not parallel) because: (a) the
536 // payloads are typically tiny — even a large C TU stages ~tens of
537 // bindings + ~dozens of callsites — and (b) the address-taken names
538 // need their staging-local `StringId`s resolved to owned strings while
539 // we still hold the source `StagingGraph` reference; once Phase 3
540 // returns, the chunk-local `ParsedFile`s drop and the staged strings
541 // become unrecoverable.
542 //
543 // Local-string resolution: `pending_address_taken_names` contains
544 // staging-local `StringId`s interned by `helper.intern(name)` (see
545 // `helper::mark_function_address_taken_by_name`). The applier needs
546 // the underlying `&str` to re-intern through the **post-dedup** graph
547 // interner, so we resolve here via `staging.resolve_local_string`.
548 // The string already had its `intern` ref-count bumped on stage —
549 // the post-4c-prime applier's re-intern produces the canonical global
550 // `StringId` independent of the staging-local id.
551 let c_indirect_drain = collect_c_indirect_drain(plan, staging_graphs);
552
553 Phase3Result {
554 per_file_edges,
555 per_file_node_ids,
556 total_nodes_written,
557 total_strings_written,
558 total_edges_collected,
559 c_indirect_drain,
560 }
561}
562
563/// Drain per-file C indirect-call staging payloads from the chunk.
564///
565/// Returns `Some(PhaseCIndirectDrain)` when at least one file in the chunk
566/// staged a `CIndirectStagingPayload`; otherwise `None` (non-C workspaces).
567/// Sequential rather than parallel — see commentary in
568/// [`phase3_parallel_commit`] for the rationale.
569fn collect_c_indirect_drain(
570 plan: &ChunkCommitPlan,
571 staging_graphs: &[&StagingGraph],
572) -> Option<PhaseCIndirectDrain> {
573 debug_assert_eq!(plan.file_plans.len(), staging_graphs.len());
574
575 let mut drain = PhaseCIndirectDrain::default();
576
577 for (file_plan, staging) in plan.file_plans.iter().zip(staging_graphs.iter()) {
578 let Some(payload) = staging.c_indirect() else {
579 continue;
580 };
581
582 // Resolve local string ids → owned Strings for address-taken names.
583 // A `None` from `resolve_local_string` would indicate a staging-API
584 // misuse (a non-local id was pushed). Skip with a warn rather than
585 // panic so a buggy plugin can't take down the build pipeline.
586 //
587 // Each captured entry pairs the resolved name with the origin
588 // `file_plan.file_id` (DESIGN §8.2 lines 1239-1241), allowing the
589 // post-unification applier to scope the workspace-global by_name
590 // fallback to C-language nodes.
591 for &local_id in &payload.pending_address_taken_names {
592 match staging.resolve_local_string(local_id) {
593 Some(name) => drain.address_taken_names.push(DeferredAddressTakenEntry {
594 function_qualified_name: name.to_owned(),
595 file_id: file_plan.file_id,
596 }),
597 None => log::warn!(
598 "Phase 3 C-indirect drain: address-taken name local string id \
599 {:?} did not resolve in staging graph for file {:?} — skipping. \
600 This indicates the C plugin staged a non-local StringId via \
601 helper.mark_function_address_taken_by_name.",
602 local_id,
603 file_plan.file_id,
604 ),
605 }
606 }
607
608 // `pending_struct_field_signatures` is already `Vec<(String, String, String)>`
609 // — clone the triple set into the drain. We clone (rather than
610 // mutate-take) because `staging` is a `&StagingGraph` shared borrow.
611 drain
612 .struct_field_signatures
613 .extend(payload.pending_struct_field_signatures.iter().cloned());
614
615 // `pending_bindings` is `Vec<PendingBinding>` (owned Strings).
616 // Stamp each binding with its origin `file_plan.file_id` so the
617 // post-unification applier can scope the by_name fallback for
618 // `instance_name` / `target_fn_name` resolution to C-language
619 // nodes (same rationale as `address_taken_names` above).
620 drain.bindings.extend(
621 payload
622 .pending_bindings
623 .iter()
624 .cloned()
625 .map(|b| (file_plan.file_id, b)),
626 );
627
628 // Stamp each indirect callsite with its FileId from the plan, then
629 // append. The applier needs the FileId to construct the persisted
630 // `IndirectCallsite` (which carries `caller: NodeId` + `file_id:
631 // FileId` rather than staging's `caller_qualified_name: String`).
632 drain.indirect_callsites.extend(
633 payload
634 .pending_indirect_callsites
635 .iter()
636 .cloned()
637 .map(|cs| (file_plan.file_id, cs)),
638 );
639
640 // Move the per-file scope index by clone (we hold `&StagingGraph`,
641 // so cannot take). `LocalScopeIndex: Clone` — see
642 // `c_indirect/scope_index.rs:21` documentation header.
643 if let Some(scope_index) = payload.local_scope_index.as_ref() {
644 drain
645 .local_scope_indices
646 .push((file_plan.file_id, scope_index.clone()));
647 }
648 }
649
650 if drain.is_empty() { None } else { Some(drain) }
651}
652
653/// Commit a single file's staging graph into pre-allocated disjoint ranges.
654///
655/// This function operates on slices that belong exclusively to this file,
656/// so it requires no locks or synchronization.
657///
658/// # Steps
659///
660/// 1. **Strings**: Extract `InternString` ops, write `Arc<str>` values into
661/// pre-allocated string slots, build local→global `StringId` remap.
662/// 2. **Nodes**: Extract `AddNode` ops, apply string remap to each `NodeEntry`,
663/// set `file_id`, write into pre-allocated node slots, build expected→actual
664/// `NodeId` remap.
665/// 3. **Edges**: Extract `AddEdge` ops, apply node ID remap to source/target,
666/// assign pre-computed sequence numbers, return as `PendingEdge` vec.
667// Result of committing a single file: edges + committed NodeIds + actual written counts.
668struct FileCommitResult {
669 edges: Vec<PendingEdge>,
670 /// Every `NodeId` committed into the arena for this file, in
671 /// commit order. Used by the sequential post-commit step that
672 /// populates `FileRegistry::per_file_nodes`.
673 node_ids: Vec<NodeId>,
674 nodes_written: usize,
675 strings_written: usize,
676 /// Cluster B3 (Go T1 implements-and-promotion): per-file
677 /// [`GoEmbeddingHint`] / [`GoNamedTypeConversionHint`] /
678 /// [`GoReceiverCallHint`] entries, with their staging-local
679 /// `NodeId` / `StringId` fields remapped to global identities via
680 /// the same tables that drive node + edge commit. The sequential
681 /// post-rayon step in [`phase3_parallel_commit`] aggregates these
682 /// across files and flushes the result into
683 /// [`crate::graph::unified::mutation_target::GraphMutationTarget::go_hints_mut`].
684 ///
685 /// Non-Go staging graphs leave all four vectors empty — no work
686 /// is performed for them.
687 embedding_hints: Vec<GoEmbeddingHint>,
688 named_type_conversion_hints: Vec<GoNamedTypeConversionHint>,
689 receiver_call_hints: Vec<GoReceiverCallHint>,
690 /// Cluster D2.1: per-method receiver-pointerness hints recovered from
691 /// the Go plugin's Phase-1 method emission sites. `method_node` and
692 /// `receiver_type_qualified_name` are remapped via the per-file
693 /// remap tables in the same `commit_single_file` step that drives
694 /// the other three hint vectors.
695 method_receiver_hints: Vec<GoMethodReceiverHint>,
696 /// Cluster D3: canonical-signature hints for Go method declarations
697 /// (top-level methods and interface methods). `method_node` is
698 /// remapped via the per-file node table. `canonical_signature` is a
699 /// plain `String` so no `StringId` remap is needed.
700 method_signature_hints: Vec<GoMethodSignatureHint>,
701 /// Cluster D3: canonical-signature hints for Go function
702 /// declarations and named function-type declarations. `function_node`
703 /// is remapped via the per-file node table; `canonical_signature` is
704 /// plain text.
705 function_signature_hints: Vec<GoFunctionSignatureHint>,
706}
707
708fn commit_single_file(
709 staging: &StagingGraph,
710 plan: &FilePlan,
711 node_slots: &mut [Slot<NodeEntry>],
712 str_slots: &mut [Option<Arc<str>>],
713 rc_slots: &mut [u32],
714) -> FileCommitResult {
715 let ops = staging.operations();
716
717 // --- Step 1: Write strings, build local→global remap ---
718 let (string_remap, strings_written) = write_strings(ops, plan, str_slots, rc_slots);
719
720 // --- Step 2: Write nodes, build expected→actual node ID remap ---
721 let (node_remap, nodes_written, node_ids) = write_nodes(ops, plan, node_slots, &string_remap);
722
723 // --- Step 3: Collect remapped edges with pre-assigned sequence numbers ---
724 let edges = collect_edges(ops, plan, &node_remap, &string_remap);
725
726 // --- Step 4 (Cluster B3 / D2.1 / D3): Remap Go side-channel hints ---
727 //
728 // The Go plugin captures staging-local NodeId / StringId values in
729 // GoHints during Phase-1 parse. Both ID spaces are file-local until
730 // Phase 3 writes the file's nodes and strings into the globally
731 // assigned ranges; once node_remap / string_remap exist, every hint
732 // gets the same identity rewrite as a PendingEdge.
733 let RemappedGoHints {
734 embeddings: embedding_hints,
735 named_type_conversions: named_type_conversion_hints,
736 receiver_calls: receiver_call_hints,
737 method_receivers: method_receiver_hints,
738 method_signatures: method_signature_hints,
739 function_signatures: function_signature_hints,
740 } = remap_go_hints(staging, &node_remap, &string_remap, plan);
741
742 FileCommitResult {
743 edges,
744 node_ids,
745 nodes_written,
746 strings_written,
747 embedding_hints,
748 named_type_conversion_hints,
749 receiver_call_hints,
750 method_receiver_hints,
751 method_signature_hints,
752 function_signature_hints,
753 }
754}
755
756/// Bundle returned by [`remap_go_hints`] so the per-file commit step can
757/// destructure without juggling a tuple of six vectors. Each field
758/// matches its sibling on [`FileCommitResult`].
759struct RemappedGoHints {
760 embeddings: Vec<GoEmbeddingHint>,
761 named_type_conversions: Vec<GoNamedTypeConversionHint>,
762 receiver_calls: Vec<GoReceiverCallHint>,
763 method_receivers: Vec<GoMethodReceiverHint>,
764 method_signatures: Vec<GoMethodSignatureHint>,
765 function_signatures: Vec<GoFunctionSignatureHint>,
766}
767
768/// Remap a `NodeId` through the per-file node-remap table.
769///
770/// Hint construction in the plugin uses staging-local NodeIds (assigned
771/// by `StagingGraph::add_node` / equivalents). After Phase 3 commit the
772/// canonical NodeId for each staged node lives in `node_remap`; the
773/// remap is identity for already-global IDs.
774fn remap_node_id_hint(id: NodeId, node_remap: &HashMap<NodeId, NodeId>) -> NodeId {
775 node_remap.get(&id).copied().unwrap_or(id)
776}
777
778/// Remap a `StringId` through the per-file string-remap table.
779///
780/// Local-tagged staging StringIds are mapped to their global slot ID;
781/// already-global IDs are passed through unchanged.
782fn remap_string_id_hint(id: StringId, string_remap: &HashMap<StringId, StringId>) -> StringId {
783 if id.is_local() {
784 string_remap.get(&id).copied().unwrap_or(id)
785 } else {
786 id
787 }
788}
789
790/// Drain the staging graph's Go hint vectors, remap each entry's
791/// staging-local `NodeId` / `StringId` fields through the per-file
792/// remap tables built by Phase 3 commit, and return four globally-
793/// addressable vectors ready to be merged into the live target.
794///
795/// Non-Go staging graphs return empty vectors with no allocations
796/// beyond the empty `Vec::new()` headers.
797fn remap_go_hints(
798 staging: &StagingGraph,
799 node_remap: &HashMap<NodeId, NodeId>,
800 string_remap: &HashMap<StringId, StringId>,
801 plan: &FilePlan,
802) -> RemappedGoHints {
803 let hints = staging.go_hints();
804 if hints.embeddings.is_empty()
805 && hints.named_type_conversions.is_empty()
806 && hints.receiver_calls.is_empty()
807 && hints.method_receivers.is_empty()
808 && hints.method_signatures.is_empty()
809 && hints.function_signatures.is_empty()
810 {
811 return RemappedGoHints {
812 embeddings: Vec::new(),
813 named_type_conversions: Vec::new(),
814 receiver_calls: Vec::new(),
815 method_receivers: Vec::new(),
816 method_signatures: Vec::new(),
817 function_signatures: Vec::new(),
818 };
819 }
820
821 let embeddings: Vec<GoEmbeddingHint> = hints
822 .embeddings
823 .iter()
824 .map(|h| GoEmbeddingHint {
825 outer: remap_node_id_hint(h.outer, node_remap),
826 inner_qualified_name: remap_string_id_hint(h.inner_qualified_name, string_remap),
827 pointerness: h.pointerness,
828 file: plan.file_id,
829 })
830 .collect();
831
832 let named_type_conversions: Vec<GoNamedTypeConversionHint> = hints
833 .named_type_conversions
834 .iter()
835 .map(|h| GoNamedTypeConversionHint {
836 call_site: remap_node_id_hint(h.call_site, node_remap),
837 target_type_qualified_name: remap_string_id_hint(
838 h.target_type_qualified_name,
839 string_remap,
840 ),
841 argument_node: remap_node_id_hint(h.argument_node, node_remap),
842 file: plan.file_id,
843 })
844 .collect();
845
846 let receiver_calls: Vec<GoReceiverCallHint> = hints
847 .receiver_calls
848 .iter()
849 .map(|h| GoReceiverCallHint {
850 call_site: remap_node_id_hint(h.call_site, node_remap),
851 callee_method: remap_node_id_hint(h.callee_method, node_remap),
852 method_name: remap_string_id_hint(h.method_name, string_remap),
853 receiver: match &h.receiver {
854 GoReceiverHintKind::LocalIdent { binding_local } => {
855 GoReceiverHintKind::LocalIdent {
856 binding_local: remap_node_id_hint(*binding_local, node_remap),
857 }
858 }
859 // The Type-/Pointer-Prefixed and CallReturn variants
860 // carry plain `String` text — no remap required.
861 GoReceiverHintKind::TypePrefixed { type_text } => {
862 GoReceiverHintKind::TypePrefixed {
863 type_text: type_text.clone(),
864 }
865 }
866 GoReceiverHintKind::PointerPrefixed { type_text } => {
867 GoReceiverHintKind::PointerPrefixed {
868 type_text: type_text.clone(),
869 }
870 }
871 GoReceiverHintKind::CallReturn { callee_qn } => GoReceiverHintKind::CallReturn {
872 callee_qn: callee_qn.clone(),
873 },
874 },
875 argument_count: h.argument_count,
876 is_async: h.is_async,
877 file: plan.file_id,
878 })
879 .collect();
880
881 let method_receivers: Vec<GoMethodReceiverHint> = hints
882 .method_receivers
883 .iter()
884 .map(|h| GoMethodReceiverHint {
885 method_node: remap_node_id_hint(h.method_node, node_remap),
886 receiver_type_qualified_name: remap_string_id_hint(
887 h.receiver_type_qualified_name,
888 string_remap,
889 ),
890 receiver_pointerness: h.receiver_pointerness,
891 file: plan.file_id,
892 })
893 .collect();
894
895 // Cluster D3: method-signature and function-signature hints. Only
896 // the NodeId field requires remap; `canonical_signature` is a plain
897 // `String` produced by `canonicalise_go_signature` and is identity
898 // across the commit boundary.
899 let method_signatures: Vec<GoMethodSignatureHint> = hints
900 .method_signatures
901 .iter()
902 .map(|h| GoMethodSignatureHint {
903 method_node: remap_node_id_hint(h.method_node, node_remap),
904 canonical_signature: h.canonical_signature.clone(),
905 file: plan.file_id,
906 })
907 .collect();
908
909 let function_signatures: Vec<GoFunctionSignatureHint> = hints
910 .function_signatures
911 .iter()
912 .map(|h| GoFunctionSignatureHint {
913 function_node: remap_node_id_hint(h.function_node, node_remap),
914 canonical_signature: h.canonical_signature.clone(),
915 file: plan.file_id,
916 })
917 .collect();
918
919 RemappedGoHints {
920 embeddings,
921 named_type_conversions,
922 receiver_calls,
923 method_receivers,
924 method_signatures,
925 function_signatures,
926 }
927}
928
929/// Write staged strings into pre-allocated interner slots.
930///
931/// Validates that each `InternString` op has a local `StringId` and that
932/// no duplicate local IDs exist (matching the serial `commit_strings` checks).
933///
934/// Returns `(remap, strings_written)`.
935fn write_strings(
936 ops: &[StagingOp],
937 plan: &FilePlan,
938 str_slots: &mut [Option<Arc<str>>],
939 rc_slots: &mut [u32],
940) -> (HashMap<StringId, StringId>, usize) {
941 let mut remap = HashMap::new();
942 let mut string_cursor = 0usize;
943
944 for op in ops {
945 if let StagingOp::InternString { local_id, value } = op {
946 // Validate: only local IDs are allowed in staging (matching serial commit_strings)
947 assert!(
948 local_id.is_local(),
949 "non-local StringId {:?} in InternString op for file {:?}",
950 local_id,
951 plan.file_id,
952 );
953 // Validate: no duplicate local IDs (matching serial commit_strings)
954 assert!(
955 !remap.contains_key(local_id),
956 "duplicate local StringId {:?} in InternString op for file {:?}",
957 local_id,
958 plan.file_id,
959 );
960
961 if string_cursor >= str_slots.len() {
962 log::warn!(
963 "string slot overflow in file {:?}: cursor={string_cursor}, slots={}, skipping remaining strings",
964 plan.file_id,
965 str_slots.len()
966 );
967 break;
968 }
969
970 // The global StringId for this string is the pre-allocated slot index.
971 #[allow(clippy::cast_possible_truncation)] // cursor is bounded by allocated slot count
972 let global_id = StringId::new(plan.string_range.start + string_cursor as u32);
973
974 // Write the string into the pre-allocated slot.
975 str_slots[string_cursor] = Some(Arc::from(value.as_str()));
976 rc_slots[string_cursor] = 1;
977
978 remap.insert(*local_id, global_id);
979 string_cursor += 1;
980 }
981 }
982
983 (remap, string_cursor)
984}
985
986/// Remap all `StringId` fields in a `NodeEntry` using a local→global table.
987///
988/// Required field (`name`) is always remapped if local.
989/// Optional fields (`signature`, `doc`, `qualified_name`, `visibility`)
990/// are remapped if present and local.
991fn remap_node_entry_string_ids(entry: &mut NodeEntry, remap: &HashMap<StringId, StringId>) {
992 remap_required_local(&mut entry.name, remap);
993 remap_option_local(&mut entry.signature, remap);
994 remap_option_local(&mut entry.doc, remap);
995 remap_option_local(&mut entry.qualified_name, remap);
996 remap_option_local(&mut entry.visibility, remap);
997}
998
999/// Remap all local `StringId` fields in an `EdgeKind`.
1000///
1001/// Uses the same exhaustive match as `remap_edge_kind_string_ids`, but
1002/// only remaps local IDs (those with `LOCAL_TAG_BIT` set).
1003#[allow(clippy::match_same_arms)]
1004fn remap_edge_kind_local_string_ids(kind: &mut EdgeKind, remap: &HashMap<StringId, StringId>) {
1005 match kind {
1006 EdgeKind::Imports { alias, .. } => remap_option_local(alias, remap),
1007 EdgeKind::Exports { alias, .. } => remap_option_local(alias, remap),
1008 EdgeKind::TypeOf { name, .. } => remap_option_local(name, remap),
1009 EdgeKind::TraitMethodBinding {
1010 trait_name,
1011 impl_type,
1012 ..
1013 } => {
1014 remap_required_local(trait_name, remap);
1015 remap_required_local(impl_type, remap);
1016 }
1017 EdgeKind::HttpRequest { url, .. } => remap_option_local(url, remap),
1018 EdgeKind::GrpcCall { service, method } => {
1019 remap_required_local(service, remap);
1020 remap_required_local(method, remap);
1021 }
1022 EdgeKind::DbQuery { table, .. } => remap_option_local(table, remap),
1023 EdgeKind::TableRead { table_name, schema } => {
1024 remap_required_local(table_name, remap);
1025 remap_option_local(schema, remap);
1026 }
1027 EdgeKind::TableWrite {
1028 table_name, schema, ..
1029 } => {
1030 remap_required_local(table_name, remap);
1031 remap_option_local(schema, remap);
1032 }
1033 EdgeKind::TriggeredBy {
1034 trigger_name,
1035 schema,
1036 } => {
1037 remap_required_local(trigger_name, remap);
1038 remap_option_local(schema, remap);
1039 }
1040 EdgeKind::MessageQueue { protocol, topic } => {
1041 if let MqProtocol::Other(s) = protocol {
1042 remap_required_local(s, remap);
1043 }
1044 remap_option_local(topic, remap);
1045 }
1046 EdgeKind::WebSocket { event } => remap_option_local(event, remap),
1047 EdgeKind::GraphQLOperation { operation } => remap_required_local(operation, remap),
1048 EdgeKind::ProcessExec { command } => remap_required_local(command, remap),
1049 EdgeKind::FileIpc { path_pattern } => remap_option_local(path_pattern, remap),
1050 EdgeKind::ProtocolCall { protocol, metadata } => {
1051 remap_required_local(protocol, remap);
1052 remap_option_local(metadata, remap);
1053 }
1054 // Variants without StringId fields — exhaustive, no wildcard.
1055 EdgeKind::Defines
1056 | EdgeKind::Contains
1057 | EdgeKind::Calls { .. }
1058 | EdgeKind::References
1059 | EdgeKind::Inherits
1060 | EdgeKind::Implements
1061 | EdgeKind::LifetimeConstraint { .. }
1062 | EdgeKind::MacroExpansion { .. }
1063 | EdgeKind::FfiCall { .. }
1064 | EdgeKind::WebAssemblyCall
1065 | EdgeKind::GenericBound
1066 | EdgeKind::AnnotatedWith
1067 | EdgeKind::AnnotationParam
1068 | EdgeKind::LambdaCaptures
1069 | EdgeKind::ModuleExports
1070 | EdgeKind::ModuleRequires
1071 | EdgeKind::ModuleOpens
1072 | EdgeKind::ModuleProvides
1073 | EdgeKind::TypeArgument
1074 | EdgeKind::ExtensionReceiver
1075 | EdgeKind::CompanionOf
1076 | EdgeKind::SealedPermit
1077 // T3 Wraps carries WrapKind (Copy) + Option<u16>; no StringId fields.
1078 | EdgeKind::Wraps { .. } => {}
1079 }
1080}
1081
1082/// Remap a required local `StringId` in place.
1083///
1084/// Panics if a local ID has no mapping, matching the serial
1085/// `apply_string_remap` behavior that returned `UnmappedLocalStringId`.
1086fn remap_required_local(id: &mut StringId, remap: &HashMap<StringId, StringId>) {
1087 if id.is_local() {
1088 let global = remap.get(id).unwrap_or_else(|| {
1089 panic!("unmapped local StringId {id:?} — missing intern_string op?")
1090 });
1091 *id = *global;
1092 }
1093}
1094
1095/// Remap an optional local `StringId` in place.
1096fn remap_option_local(opt: &mut Option<StringId>, remap: &HashMap<StringId, StringId>) {
1097 if let Some(id) = opt
1098 && id.is_local()
1099 {
1100 let global = remap.get(id).unwrap_or_else(|| {
1101 panic!("unmapped local StringId {id:?} — missing intern_string op?")
1102 });
1103 *id = *global;
1104 }
1105}
1106
1107/// Write staged nodes into pre-allocated arena slots.
1108///
1109/// Returns `(remap, nodes_written, node_ids)`. `node_ids` is the Vec of
1110/// every `NodeId` committed for this file, in commit order, for use by
1111/// the sequential bucket-population post-step.
1112fn write_nodes(
1113 ops: &[StagingOp],
1114 plan: &FilePlan,
1115 node_slots: &mut [Slot<NodeEntry>],
1116 string_remap: &HashMap<StringId, StringId>,
1117) -> (HashMap<NodeId, NodeId>, usize, Vec<NodeId>) {
1118 let mut node_remap = HashMap::new();
1119 let mut node_cursor = 0usize;
1120 let mut node_ids: Vec<NodeId> = Vec::with_capacity(node_slots.len());
1121
1122 for op in ops {
1123 if let StagingOp::AddNode {
1124 entry, expected_id, ..
1125 } = op
1126 {
1127 if node_cursor >= node_slots.len() {
1128 log::warn!(
1129 "node slot overflow in file {:?}: cursor={node_cursor}, slots={}, skipping remaining nodes",
1130 plan.file_id,
1131 node_slots.len()
1132 );
1133 break;
1134 }
1135
1136 let mut entry = entry.clone();
1137
1138 // Apply string remap to all StringId fields in the entry.
1139 remap_node_entry_string_ids(&mut entry, string_remap);
1140
1141 // Set the file ID from the plan.
1142 entry.file = plan.file_id;
1143
1144 // The actual NodeId is the pre-allocated slot index with generation 1.
1145 #[allow(clippy::cast_possible_truncation)] // cursor is bounded by allocated slot count
1146 let actual_index = plan.node_range.start + node_cursor as u32;
1147 let actual_id = NodeId::new(actual_index, 1);
1148
1149 // Write into the pre-allocated slot.
1150 node_slots[node_cursor] = Slot::new_occupied(1, entry);
1151
1152 if let Some(expected) = expected_id {
1153 node_remap.insert(*expected, actual_id);
1154 }
1155
1156 node_ids.push(actual_id);
1157 node_cursor += 1;
1158 }
1159 }
1160
1161 (node_remap, node_cursor, node_ids)
1162}
1163
1164/// Collect staged edges with remapped node IDs, string IDs, and pre-assigned
1165/// sequence numbers.
1166fn collect_edges(
1167 ops: &[StagingOp],
1168 plan: &FilePlan,
1169 node_remap: &HashMap<NodeId, NodeId>,
1170 string_remap: &HashMap<StringId, StringId>,
1171) -> Vec<PendingEdge> {
1172 let mut edges = Vec::new();
1173
1174 for op in ops {
1175 if let StagingOp::AddEdge {
1176 source,
1177 target,
1178 kind,
1179 spans,
1180 ..
1181 } = op
1182 {
1183 let actual_source = node_remap.get(source).copied().unwrap_or(*source);
1184 let actual_target = node_remap.get(target).copied().unwrap_or(*target);
1185
1186 // Clone and remap any local StringIds in the EdgeKind.
1187 let mut remapped_kind = kind.clone();
1188 remap_edge_kind_local_string_ids(&mut remapped_kind, string_remap);
1189
1190 edges.push(PendingEdge {
1191 source: actual_source,
1192 target: actual_target,
1193 kind: remapped_kind,
1194 file: plan.file_id,
1195 spans: spans.clone(),
1196 });
1197 }
1198 }
1199
1200 edges
1201}
1202
1203/// Remap a required `StringId` using the dedup remap table.
1204///
1205/// If the ID is in the remap table, it is replaced with the canonical ID.
1206/// Otherwise, it is left unchanged (identity mapping).
1207#[allow(clippy::implicit_hasher)]
1208pub fn remap_string_id(id: &mut StringId, remap: &HashMap<StringId, StringId>) {
1209 if let Some(&canonical) = remap.get(id) {
1210 *id = canonical;
1211 }
1212}
1213
1214/// Remap an optional `StringId` using the dedup remap table.
1215#[allow(clippy::implicit_hasher)]
1216pub fn remap_option_string_id(id: &mut Option<StringId>, remap: &HashMap<StringId, StringId>) {
1217 if let Some(inner) = id {
1218 remap_string_id(inner, remap);
1219 }
1220}
1221
1222/// Exhaustive remap of all `StringId` fields in an `EdgeKind`.
1223///
1224/// No wildcard arm — the compiler ensures completeness when new variants
1225/// are added to `EdgeKind`.
1226#[allow(clippy::match_same_arms, clippy::implicit_hasher)] // Arms are separated by category for documentation clarity
1227pub fn remap_edge_kind_string_ids(kind: &mut EdgeKind, remap: &HashMap<StringId, StringId>) {
1228 match kind {
1229 // === Variants WITH StringId fields ===
1230 EdgeKind::Imports { alias, .. } => remap_option_string_id(alias, remap),
1231 EdgeKind::Exports { alias, .. } => remap_option_string_id(alias, remap),
1232 EdgeKind::TypeOf { name, .. } => remap_option_string_id(name, remap),
1233 EdgeKind::TraitMethodBinding {
1234 trait_name,
1235 impl_type,
1236 ..
1237 } => {
1238 remap_string_id(trait_name, remap);
1239 remap_string_id(impl_type, remap);
1240 }
1241 EdgeKind::HttpRequest { url, .. } => remap_option_string_id(url, remap),
1242 EdgeKind::GrpcCall { service, method } => {
1243 remap_string_id(service, remap);
1244 remap_string_id(method, remap);
1245 }
1246 EdgeKind::DbQuery { table, .. } => remap_option_string_id(table, remap),
1247 EdgeKind::TableRead { table_name, schema } => {
1248 remap_string_id(table_name, remap);
1249 remap_option_string_id(schema, remap);
1250 }
1251 EdgeKind::TableWrite {
1252 table_name, schema, ..
1253 } => {
1254 remap_string_id(table_name, remap);
1255 remap_option_string_id(schema, remap);
1256 }
1257 EdgeKind::TriggeredBy {
1258 trigger_name,
1259 schema,
1260 } => {
1261 remap_string_id(trigger_name, remap);
1262 remap_option_string_id(schema, remap);
1263 }
1264 EdgeKind::MessageQueue { protocol, topic } => {
1265 if let MqProtocol::Other(s) = protocol {
1266 remap_string_id(s, remap);
1267 }
1268 remap_option_string_id(topic, remap);
1269 }
1270 EdgeKind::WebSocket { event } => remap_option_string_id(event, remap),
1271 EdgeKind::GraphQLOperation { operation } => remap_string_id(operation, remap),
1272 EdgeKind::ProcessExec { command } => remap_string_id(command, remap),
1273 EdgeKind::FileIpc { path_pattern } => remap_option_string_id(path_pattern, remap),
1274 EdgeKind::ProtocolCall { protocol, metadata } => {
1275 remap_string_id(protocol, remap);
1276 remap_option_string_id(metadata, remap);
1277 }
1278 // === Variants WITHOUT StringId fields — exhaustive, no wildcard ===
1279 EdgeKind::Defines
1280 | EdgeKind::Contains
1281 | EdgeKind::Calls { .. }
1282 | EdgeKind::References
1283 | EdgeKind::Inherits
1284 | EdgeKind::Implements
1285 | EdgeKind::LifetimeConstraint { .. }
1286 | EdgeKind::MacroExpansion { .. }
1287 | EdgeKind::FfiCall { .. }
1288 | EdgeKind::WebAssemblyCall
1289 | EdgeKind::GenericBound
1290 | EdgeKind::AnnotatedWith
1291 | EdgeKind::AnnotationParam
1292 | EdgeKind::LambdaCaptures
1293 | EdgeKind::ModuleExports
1294 | EdgeKind::ModuleRequires
1295 | EdgeKind::ModuleOpens
1296 | EdgeKind::ModuleProvides
1297 | EdgeKind::TypeArgument
1298 | EdgeKind::ExtensionReceiver
1299 | EdgeKind::CompanionOf
1300 | EdgeKind::SealedPermit
1301 // T3 Wraps carries WrapKind (Copy) + Option<u16>; no StringId fields.
1302 | EdgeKind::Wraps { .. } => {}
1303 }
1304}
1305
1306// === Phase 4: Post-chunk Finalization ===
1307
1308/// Apply global string dedup remap to all `StringId` fields in a `NodeEntry`.
1309///
1310/// This is the Phase 4 counterpart to `remap_node_entry_string_ids` (Phase 3).
1311/// Phase 3 remaps local→global; Phase 4 remaps duplicate global→canonical global.
1312#[allow(clippy::implicit_hasher)]
1313pub fn remap_node_entry_global(entry: &mut NodeEntry, remap: &HashMap<StringId, StringId>) {
1314 remap_string_id(&mut entry.name, remap);
1315 remap_option_string_id(&mut entry.signature, remap);
1316 remap_option_string_id(&mut entry.doc, remap);
1317 remap_option_string_id(&mut entry.qualified_name, remap);
1318 remap_option_string_id(&mut entry.visibility, remap);
1319}
1320
1321/// Apply global string dedup remap to all nodes in the arena and all pending edges.
1322///
1323/// This is Phase 4b of the parallel commit pipeline. After `build_dedup_table()`
1324/// produces a remap table, this function applies it to every `StringId` in:
1325/// - All `NodeEntry` fields in the arena
1326/// - All `EdgeKind` fields in the pending edges
1327#[allow(clippy::implicit_hasher)]
1328pub fn phase4_apply_global_remap(
1329 arena: &mut NodeArena,
1330 all_edges: &mut [Vec<PendingEdge>],
1331 remap: &HashMap<StringId, StringId>,
1332) {
1333 if remap.is_empty() {
1334 return;
1335 }
1336
1337 // Remap all nodes
1338 for (_id, entry) in arena.iter_mut() {
1339 remap_node_entry_global(entry, remap);
1340 }
1341
1342 // Remap all edges
1343 for file_edges in all_edges.iter_mut() {
1344 for edge in file_edges.iter_mut() {
1345 remap_edge_kind_string_ids(&mut edge.kind, remap);
1346 }
1347 }
1348}
1349
1350/// Statistics from Phase 4c-prime cross-file node unification.
1351#[derive(Debug, Default)]
1352pub struct UnificationStats {
1353 /// Total (qualified_name, kind) groups of size >= 2 examined.
1354 pub candidate_pairs_examined: usize,
1355 /// Number of loser nodes merged into winners.
1356 pub nodes_merged: usize,
1357 /// Number of PendingEdge fields rewritten.
1358 pub edges_rewritten: usize,
1359 /// Number of loser nodes (metadata merged into winners, slot kept inert).
1360 pub nodes_inert: usize,
1361 /// Time spent in the unification pass (milliseconds).
1362 pub elapsed_ms: u64,
1363}
1364
1365/// Phase 4c-prime: Unify cross-file duplicate nodes sharing the same
1366/// canonical qualified name and a call-compatible kind.
1367///
1368/// Runs after `rebuild_indices` (Phase 4c) which populates `by_qualified_name`,
1369/// and before `pending_edges_to_delta` (Phase 4d) so the remap operates on
1370/// `PendingEdge` targets, not committed `DeltaEdge`s.
1371///
1372/// **Winner selection**: Among nodes sharing a qualified name and call-compatible
1373/// kinds, the node with `start_line > 0` wins. Tie-break in order:
1374/// 1. Wider `end_line - start_line` span.
1375/// 2. **Lexicographically smallest file path** (resolved via the rebuild
1376/// plane's [`FileRegistry`]). Phase 3e correctness requires the
1377/// path-based tie-break rather than the previous `FileId` comparison,
1378/// because `FileId` slot assignment differs between a fresh full
1379/// rebuild and an incremental rebuild — the incremental path clones
1380/// the existing `FileRegistry` and appends new paths, while the full
1381/// path assigns FileIds in filesystem-walk order from an empty
1382/// registry. Two builds of the same logical workspace therefore
1383/// disagree on which `FileId` is smaller when duplicate definitions
1384/// tie on span width, flipping the unification winner and stranding
1385/// `qualified_name` on the wrong side of the merge. Tie-breaking on
1386/// the (stable-across-builds) path makes winner selection
1387/// representation-independent.
1388/// 3. Final fallback: smaller `NodeId::index()` when paths also tie
1389/// (e.g. two definitions in the same file — rare but possible via
1390/// duplicate declarations). `NodeId` is deterministic within a
1391/// single build so this keeps the fallback stable for any individual
1392/// build even if it isn't invariant across representations.
1393///
1394/// **Safety**: Caller must hold an exclusive write lock on the graph.
1395pub(crate) fn phase4c_prime_unify_cross_file_nodes<
1396 G: crate::graph::unified::mutation_target::GraphMutationTarget,
1397>(
1398 graph: &mut G,
1399 all_edges: &mut [Vec<PendingEdge>],
1400) -> (UnificationStats, super::unification::NodeRemapTable) {
1401 use crate::graph::unified::mutation_target::GraphMutationTarget;
1402
1403 use super::helper::CALL_COMPATIBLE_KINDS;
1404 use super::unification::{NodeRemapTable, merge_node_into};
1405 use std::time::Instant;
1406
1407 let start = Instant::now();
1408 let mut stats = UnificationStats::default();
1409
1410 // Collect candidates: walk arena, group by qualified_name for nodes
1411 // with call-compatible kinds. Only groups of size >= 2 need unification.
1412 let mut qn_groups: HashMap<crate::graph::unified::string::StringId, Vec<NodeId>> =
1413 HashMap::new();
1414
1415 for (node_id, entry) in GraphMutationTarget::nodes(graph).iter() {
1416 if !CALL_COMPATIBLE_KINDS.contains(&entry.kind) {
1417 continue;
1418 }
1419 if let Some(qn_id) = entry.qualified_name {
1420 qn_groups.entry(qn_id).or_default().push(node_id);
1421 }
1422 }
1423
1424 // Filter to groups with 2+ members
1425 let groups_to_unify: Vec<Vec<NodeId>> = qn_groups
1426 .into_values()
1427 .filter(|group| {
1428 if group.len() >= 2 {
1429 stats.candidate_pairs_examined += 1;
1430 true
1431 } else {
1432 false
1433 }
1434 })
1435 .collect();
1436
1437 // Now perform merges
1438 let mut remap = NodeRemapTable::with_capacity(groups_to_unify.len());
1439
1440 // Pre-resolve every candidate node's canonical path-based tie-break
1441 // key into an owned `String` keyed by `NodeId`. Lifting the resolution
1442 // here instead of inside the `max_by` comparator avoids re-borrowing
1443 // `graph` immutably from a closure that lives across the
1444 // `merge_node_into(&mut graph, …)` call below. Without this
1445 // precomputation the borrow checker rejects the mutation loop because
1446 // the comparator closure captures the immutable borrow of `graph`
1447 // required by `path_key`.
1448 //
1449 // Path conversion goes through `Arc<Path>::to_string_lossy()` because
1450 // `Path` does not implement `Ord` lexicographically across platforms
1451 // consistently; forcing a canonical string form keeps the tie-break
1452 // deterministic on any host filesystem. When the registry can't
1453 // resolve a `FileId` (shouldn't happen in practice — every live
1454 // node's `FileId` was registered before the node was allocated) we
1455 // fall back to an empty string so the comparison still produces a
1456 // total order. Empty resolves tie-break each other stably (then fall
1457 // through to the `NodeId` index tie-break).
1458 let path_keys: HashMap<NodeId, String> = {
1459 let arena = GraphMutationTarget::nodes(graph);
1460 let files = GraphMutationTarget::files(graph);
1461 let mut out: HashMap<NodeId, String> =
1462 HashMap::with_capacity(groups_to_unify.iter().map(Vec::len).sum());
1463 for group in &groups_to_unify {
1464 for &nid in group {
1465 if out.contains_key(&nid) {
1466 continue;
1467 }
1468 let key = arena
1469 .get(nid)
1470 .and_then(|entry| files.resolve(entry.file))
1471 .map_or_else(String::new, |path| path.to_string_lossy().into_owned());
1472 out.insert(nid, key);
1473 }
1474 }
1475 out
1476 };
1477 let empty_path_key = String::new();
1478
1479 for group in &groups_to_unify {
1480 // Pick winner: prefer start_line > 0, tie-break by wider span,
1481 // then smaller path (stable across rebuild representations),
1482 // then smaller NodeId index.
1483 let winner_id = *group
1484 .iter()
1485 .max_by(|&&a, &&b| {
1486 let ea = GraphMutationTarget::nodes(graph).get(a);
1487 let eb = GraphMutationTarget::nodes(graph).get(b);
1488 match (ea, eb) {
1489 (Some(ea), Some(eb)) => {
1490 // Primary: prefer non-zero start_line
1491 let a_real = ea.start_line > 0;
1492 let b_real = eb.start_line > 0;
1493 match (a_real, b_real) {
1494 (true, false) => std::cmp::Ordering::Greater,
1495 (false, true) => std::cmp::Ordering::Less,
1496 _ => {
1497 // Tie-break 1: prefer wider span
1498 let a_range = ea.end_line.saturating_sub(ea.start_line);
1499 let b_range = eb.end_line.saturating_sub(eb.start_line);
1500 a_range
1501 .cmp(&b_range)
1502 .then_with(|| {
1503 // Tie-break 2: prefer smaller path
1504 // (reversed because `max_by` picks the
1505 // greater side — we want smaller path
1506 // to win, so invert the direct compare).
1507 let pa = path_keys.get(&a).unwrap_or(&empty_path_key);
1508 let pb = path_keys.get(&b).unwrap_or(&empty_path_key);
1509 pb.cmp(pa)
1510 })
1511 .then_with(|| {
1512 // Tie-break 3: smaller NodeId index
1513 // (stable within a single build;
1514 // deterministic fallback for co-located
1515 // duplicate definitions).
1516 b.index().cmp(&a.index())
1517 })
1518 }
1519 }
1520 }
1521 (Some(_), None) => std::cmp::Ordering::Greater,
1522 (None, Some(_)) => std::cmp::Ordering::Less,
1523 (None, None) => std::cmp::Ordering::Equal,
1524 }
1525 })
1526 .expect("group is non-empty");
1527
1528 // Merge all losers into winner
1529 for &node_id in group {
1530 if node_id == winner_id {
1531 continue;
1532 }
1533 match merge_node_into(GraphMutationTarget::nodes_mut(graph), node_id, winner_id) {
1534 Ok(()) => {
1535 remap.insert(node_id, winner_id);
1536 stats.nodes_merged += 1;
1537 stats.nodes_inert += 1;
1538 }
1539 Err(e) => {
1540 log::debug!("Phase 4c-prime: skipping merge ({e})");
1541 }
1542 }
1543 }
1544 }
1545
1546 // Apply remap table to all pending edges AND to every committed
1547 // edge already in the graph's edge store.
1548 //
1549 // The `apply_to_edges` call keeps PendingEdges (the output of this
1550 // chunk's parallel commit) pointing at canonical winners before
1551 // Phase 4d converts them into `DeltaEdge`s. On a full build that is
1552 // sufficient — no committed edges exist yet.
1553 //
1554 // The `apply_to_committed_edges` call closes the Phase 3e incremental
1555 // gap: the rebuild plane clones the pre-edit graph's committed edges
1556 // via `clone_for_rebuild`, so a newly-reparsed file whose definition
1557 // becomes the unification winner can leave surviving cross-file
1558 // edges pointing at what is now an inert loser slot. Retargeting the
1559 // committed edges through `remap` is the only way those edges
1560 // observe the canonical winner after finalize. On a full build the
1561 // second call is a no-op (edge store is empty).
1562 if !remap.is_empty() {
1563 let pre_count: usize = all_edges.iter().map(|v| v.len()).sum();
1564 remap.apply_to_edges(all_edges);
1565 remap.apply_to_committed_edges(GraphMutationTarget::edges(graph));
1566 stats.edges_rewritten = pre_count; // conservative: all edges walked
1567
1568 // Keep FileRegistry::per_file_nodes consistent with the arena.
1569 //
1570 // [`merge_node_into`] (see `unification.rs`) intentionally does
1571 // **not** vacate the loser slot — the slot stays `Occupied` but
1572 // inert so `NodeArena::slot_count()` (which CSR row_ptr sizing
1573 // depends on) is preserved. Because the slot is still live per
1574 // `NodeArena::iter()`, the §F.1 bucket bijection would panic
1575 // with "live node absent from all buckets" if we purged losers
1576 // from their home bucket.
1577 //
1578 // Therefore: losers stay in whichever per-file bucket Phase 3
1579 // first committed them to. That bucket's `FileId` matches the
1580 // loser's `NodeEntry.file`, so (c) passes. Each loser is in
1581 // exactly one bucket, so (b) passes. Every live arena slot is
1582 // accounted for by some bucket, so (d) passes. The §K master
1583 // matrix already admits this semantics — inert merged-losers
1584 // are semantically equivalent to any other live `NodeArena`
1585 // entry for bucket-membership purposes.
1586 //
1587 // Name-resolution containment (Gate 0d iter-1 blocker).
1588 //
1589 // `merge_node_into` now ALSO clears the loser's `name` and
1590 // `qualified_name` fields (to `StringId::INVALID` / `None`), and
1591 // `AuxiliaryIndices::build_from_arena` skips any arena entry
1592 // whose `name == StringId::INVALID` when rebuilding the name,
1593 // qualified-name, kind, and file buckets. The second
1594 // `rebuild_indices()` call in `build_unified_graph_inner`
1595 // (entrypoint.rs:571, right below this function) runs AFTER
1596 // unification, so the buckets surfaced by `indices.by_name` /
1597 // `by_qualified_name` / `by_kind` / `by_file` contain only
1598 // winners — every public name-resolution surface
1599 // (`resolution::exact_qualified_bucket`,
1600 // `graph::find_by_pattern`, etc.) is therefore free of
1601 // unified-away duplicates. The only publish-visible bucket that
1602 // still references losers is `FileRegistry::per_file_nodes`,
1603 // which preserves the §F.1 bucket bijection without surfacing
1604 // them through name resolution.
1605 //
1606 // Historical note: an earlier iteration of this pass called
1607 // `retain_nodes_in_buckets` to purge losers; that matched a
1608 // stale understanding where `merge_node_into` was expected to
1609 // vacate the slot. Gate 0d's bucket-bijection invariant
1610 // surfaced the mismatch (every full rebuild produced a live
1611 // slot with no bucket entry). The fix is to align with the
1612 // unification contract: inert slots remain in their home
1613 // bucket, but `AuxiliaryIndices` treats them as name-invisible.
1614 }
1615
1616 stats.elapsed_ms = start.elapsed().as_millis() as u64;
1617 // Return the remap alongside the stats so the new Phase 4d-prime
1618 // (`phase4d_prime_propagate_staging_metadata`, 02_DESIGN §4.3.e
1619 // Changes 2 + 4) can drop loser-keyed metadata before merging the
1620 // per-file staging stores into `CodeGraph::macro_metadata`. The
1621 // `apply_to_edges` / `apply_to_committed_edges` calls above have
1622 // already consumed `remap` for edge retargeting; the returned table
1623 // is the same authoritative map used downstream.
1624 (stats, remap)
1625}
1626
1627/// Rekey a per-file staging `NodeMetadataStore` from staging-local
1628/// `NodeId`s to canonical arena `NodeId`s using the per-file commit
1629/// order.
1630///
1631/// 02_DESIGN §4.3.e Change 1 assumes staging metadata reaches Phase
1632/// 4d-prime under the arena NodeIds Phase 3 assigned. In practice
1633/// `StagingGraph::add_node` returns `NodeId::new(i, 1)` where `i` is the
1634/// staging-local sequential index (see `staging.rs:355`), and plugins
1635/// key their `NodeMetadataStore` entries under those staging-local IDs
1636/// (see e.g. the Rust plugin's `metadata_store.get_or_insert_default(func_id)`
1637/// at `sqry-lang-rust/src/macro_boundaries/proc_macro_classify.rs:84`).
1638/// Phase 3 then renumbers those into arena slots; `per_file_node_ids[i]`
1639/// is the arena NodeId for staging `NodeId(i, 1)`.
1640///
1641/// This helper rekeys each metadata entry by index: an entry under
1642/// staging `NodeId(i, 1)` is moved to `per_file_node_ids[i]`. Entries
1643/// whose staging index is out of bounds or whose generation is not the
1644/// staging-canonical `1` are dropped (defensive — should never happen
1645/// under the documented `StagingGraph::add_node` contract).
1646///
1647/// Returns a fresh arena-keyed [`NodeMetadataStore`]. The input is moved
1648/// in by value so callers can `take_macro_metadata` and pipe the result
1649/// through this helper into the Phase 4d-prime accumulator.
1650#[must_use]
1651pub(crate) fn rekey_staging_metadata_to_arena(
1652 staging_metadata: crate::graph::unified::storage::metadata::NodeMetadataStore,
1653 per_file_node_ids: &[crate::graph::unified::node::id::NodeId],
1654) -> crate::graph::unified::storage::metadata::NodeMetadataStore {
1655 use crate::graph::unified::node::id::NodeId;
1656 use crate::graph::unified::storage::metadata::NodeMetadataStore;
1657
1658 let mut rekeyed = NodeMetadataStore::new();
1659 for ((index, generation), entry) in staging_metadata.iter_entries() {
1660 // Defensive: staging.add_node always emits generation 1. Drop
1661 // any entry that does not match that contract; it cannot
1662 // correspond to a Phase 3 commit slot.
1663 if generation != 1 {
1664 continue;
1665 }
1666 let idx_usize = index as usize;
1667 let Some(&arena_id) = per_file_node_ids.get(idx_usize) else {
1668 // Stale key beyond the file's committed range — drop silently.
1669 continue;
1670 };
1671 let _ = NodeId::new(index, generation); // documentation: this was the staging-local id
1672 // Re-insert the whole `StoredEntry` (typed payload + flags) so both
1673 // `cfg_condition`/macro metadata AND synthetic markers survive the
1674 // staging-to-arena rekey.
1675 rekeyed.insert_entry(arena_id, entry.clone());
1676 }
1677 rekeyed
1678}
1679
1680/// Phase 4d-prime — propagate per-file staging `NodeMetadataStore` into
1681/// the live graph's `macro_metadata` after Phase 4d (bulk edge insert)
1682/// and before Phase 4e (binding-plane derivation).
1683///
1684/// 02_DESIGN §4.3.e (Changes 4 + 7): the active Phase 3 commit path does
1685/// not read `staging.macro_metadata`, and `StagingGraph::take_macro_metadata`
1686/// was previously defined but never called — staging metadata never reached
1687/// `CodeGraph::macro_metadata`. T3.8's `cfg_condition` cannot ride the Go
1688/// plugin's parallel synthetic-flag channel (per-symbol metadata, not a
1689/// boolean bit on a known set of placeholders), so this sub-phase wires
1690/// the missing path.
1691///
1692/// For each `(file_id, store)` entry:
1693/// 1. Apply the Phase 4c-prime `NodeRemapTable` via
1694/// [`NodeRemapTable::apply_to_metadata_store`] so loser-keyed entries
1695/// are dropped (per 01_SPEC §5.3.f the spec contract is "losers'
1696/// constraints are lost"; the winner's own per-file store carries the
1697/// authoritative metadata).
1698/// 2. If the store still has entries after the remap, call
1699/// [`NodeMetadataStore::merge`] into the graph's authoritative metadata
1700/// store.
1701///
1702/// Returns `true` when at least one entry was merged, `false` when every
1703/// staged store was empty or fully consumed by loser-drops. The boolean
1704/// is observed by the Phase 3d post-Pass-4d hook on the incremental
1705/// rebuild plane; production callers ignore it.
1706///
1707/// Generic over [`GraphMutationTarget`] so both the full-build
1708/// (`build_unified_graph_inner`) and incremental
1709/// (`incremental_rebuild` → `phase3d_insert_cross_file_edges`) planes
1710/// can call it against `CodeGraph` and `RebuildGraph` respectively.
1711///
1712/// Runs after Phase 4d (NodeRemapTable produced by 4c is final) and
1713/// before Phase 4e (binding-plane synthesis can observe `cfg_condition`
1714/// if it later needs to). The Rust plugin's existing `merge_macro_metadata`
1715/// call automatically benefits: Rust-side `#[cfg(...)]` strings start
1716/// flowing into the live snapshot for the first time as an incidental
1717/// fix of an existing latent gap.
1718#[must_use]
1719pub(crate) fn phase4d_prime_propagate_staging_metadata<G>(
1720 graph: &mut G,
1721 staged_metadata: Vec<(
1722 crate::graph::unified::file::id::FileId,
1723 crate::graph::unified::storage::metadata::NodeMetadataStore,
1724 )>,
1725 remap: &super::unification::NodeRemapTable,
1726) -> bool
1727where
1728 G: crate::graph::unified::mutation_target::GraphMutationTarget,
1729{
1730 let target = graph.macro_metadata_mut();
1731 let mut any_inserted = false;
1732 for (_file_id, mut metadata) in staged_metadata {
1733 remap.apply_to_metadata_store(&mut metadata);
1734 if !metadata.is_empty() {
1735 target.merge(&metadata);
1736 any_inserted = true;
1737 }
1738 }
1739 any_inserted
1740}
1741
1742/// Convert per-file `PendingEdge` collections to per-file `DeltaEdge` collections
1743/// with monotonically increasing sequence numbers.
1744///
1745/// The sequence numbers are assigned file-by-file, edge-by-edge, starting from
1746/// `seq_start`. This produces the deterministic ordering required by
1747/// `BidirectionalEdgeStore::add_edges_bulk_ordered()`.
1748#[must_use]
1749pub fn pending_edges_to_delta(
1750 per_file_edges: &[Vec<PendingEdge>],
1751 seq_start: u64,
1752) -> (Vec<Vec<DeltaEdge>>, u64) {
1753 let mut seq = seq_start;
1754 let mut result = Vec::with_capacity(per_file_edges.len());
1755
1756 for file_edges in per_file_edges {
1757 let mut delta_vec = Vec::with_capacity(file_edges.len());
1758 for edge in file_edges {
1759 delta_vec.push(DeltaEdge::with_spans(
1760 edge.source,
1761 edge.target,
1762 edge.kind.clone(),
1763 seq,
1764 DeltaOp::Add,
1765 edge.file,
1766 edge.spans.clone(),
1767 ));
1768 seq += 1;
1769 }
1770 result.push(delta_vec);
1771 }
1772
1773 (result, seq)
1774}
1775
1776/// Rebuild the auxiliary indices on `graph` from its current node arena.
1777///
1778/// Generic counterpart to the inherent [`CodeGraph::rebuild_indices`].
1779/// Takes a [`GraphMutationTarget`] so both the full-build
1780/// (`build_unified_graph_inner`) and incremental-rebuild
1781/// (`incremental_rebuild` on `RebuildGraph`) pipelines can share the
1782/// same helper. The inherent method now delegates here so the
1783/// implementation lives in exactly one place.
1784///
1785/// Internally uses [`GraphMutationTarget::nodes_and_indices_mut`] to
1786/// acquire a disjoint `(&NodeArena, &mut AuxiliaryIndices)` pair, then
1787/// hands them to [`AuxiliaryIndices::build_from_arena`] which clears
1788/// the existing indices and rebuilds in a single pass without
1789/// per-element duplicate checking.
1790///
1791/// [`CodeGraph::rebuild_indices`]: crate::graph::unified::concurrent::CodeGraph::rebuild_indices
1792/// [`AuxiliaryIndices::build_from_arena`]: crate::graph::unified::storage::indices::AuxiliaryIndices::build_from_arena
1793pub(crate) fn rebuild_indices<G: crate::graph::unified::mutation_target::GraphMutationTarget>(
1794 graph: &mut G,
1795) {
1796 let (nodes, indices) = graph.nodes_and_indices_mut();
1797 indices.build_from_arena(nodes);
1798}
1799
1800/// Phase 4d — bulk-insert every pending edge into the graph via the
1801/// deterministic `DeltaEdge` conversion path.
1802///
1803/// Wraps the pure [`pending_edges_to_delta`] conversion + the
1804/// [`BidirectionalEdgeStore::add_edges_bulk_ordered`] call that
1805/// `build_unified_graph_inner` ran inline between Phase 4c-prime and
1806/// Phase 4e. The wrapper is generic over [`GraphMutationTarget`] so
1807/// the Task 4 Step 4 Phase 3 `incremental_rebuild` body can call it
1808/// against a [`RebuildGraph`] without duplicating the seq-counter +
1809/// flatten logic.
1810///
1811/// Returns the final edge sequence counter (for callers that need to
1812/// continue allocating deterministic sequence numbers downstream).
1813/// The counter flows from
1814/// [`BidirectionalEdgeStore::forward().seq_counter()`] on the way in
1815/// and advances by one per inserted edge.
1816///
1817/// # Semantics
1818///
1819/// * `per_file_edges` is consumed by-reference; the function does not
1820/// mutate the caller's buffer. Callers who no longer need the
1821/// vectors may drop them after the call.
1822/// * If `per_file_edges` is empty (or every inner vector is empty),
1823/// the edge store is left untouched.
1824/// * The helper does not `bump_epoch()` on the graph — Phase 4d is
1825/// edge-level only; the full pipeline bumps epoch separately.
1826///
1827/// # Edge-source-identity invariant (`C_EDGE_MIGRATE`)
1828///
1829/// Phase 4d does NOT dedup edges by `(source, target, kind)`. Every
1830/// `PendingEdge` from every file becomes one `DeltaEdge` with a unique
1831/// monotonically increasing `seq` number; the
1832/// [`BidirectionalEdgeStore::add_edges_bulk_ordered`] insertion contract
1833/// preserves that 1:1 mapping. This is what lets the Cluster C
1834/// `C_EDGE_MIGRATE` DAG unit (2026-04-29 BadLiveware Go batch) move the
1835/// `TypeOf{Field}` edge source from the struct node to the per-field
1836/// `Property` node without touching this helper: the new
1837/// Property-sourced edge addresses a distinct `(source, target)` pair
1838/// from the legacy struct-sourced edge, and Phase 4d emits both shapes
1839/// with no collapsing. Plugins that only emit the new shape (Go after
1840/// `C_EDGE_MIGRATE`) therefore produce a clean Property-sourced
1841/// `TypeOf{Field}` edge set with no struct-sourced shadows. Plugins
1842/// outside Cluster C's scope (`C_OTHER_PLUGINS`) keep emitting the
1843/// legacy shape until they migrate; the bulk-insert path treats both
1844/// shapes identically.
1845///
1846/// Determinism: per-file `PendingEdge` order is fixed by the parser
1847/// pass, and `pending_edges_to_delta` walks the per-file vectors in
1848/// the input order. So `phase4d_bulk_insert_edges` produces a
1849/// byte-identical `DeltaEdge` sequence on every fresh rebuild of the
1850/// same source tree, which is what guarantees the
1851/// `SnapshotReader → SnapshotWriter` round-trip identity required by
1852/// the `C_EDGE_MIGRATE` acceptance criteria.
1853///
1854/// [`BidirectionalEdgeStore::add_edges_bulk_ordered`]: crate::graph::unified::edge::bidirectional::BidirectionalEdgeStore::add_edges_bulk_ordered
1855/// [`RebuildGraph`]: crate::graph::unified::rebuild::rebuild_graph::RebuildGraph
1856pub(crate) fn phase4d_bulk_insert_edges<
1857 G: crate::graph::unified::mutation_target::GraphMutationTarget,
1858>(
1859 graph: &mut G,
1860 per_file_edges: &[Vec<PendingEdge>],
1861) -> u64 {
1862 // Start seq numbering from the edge store's current counter to
1863 // support non-empty graphs (incremental rebuild carries forward
1864 // the prior build's counter).
1865 let edge_seq_start = graph.edges().forward().seq_counter();
1866 let (delta_edge_vecs, final_seq) = pending_edges_to_delta(per_file_edges, edge_seq_start);
1867 let total_edge_count: u64 = delta_edge_vecs.iter().map(|v| v.len() as u64).sum();
1868 if total_edge_count > 0 {
1869 graph
1870 .edges_mut()
1871 .add_edges_bulk_ordered(&delta_edge_vecs, total_edge_count);
1872 }
1873 final_seq
1874}
1875
1876#[cfg(test)]
1877mod tests {
1878 use super::*;
1879
1880 #[test]
1881 fn test_compute_commit_plan_basic() {
1882 let file_ids = vec![FileId::new(0), FileId::new(1), FileId::new(2)];
1883 let node_counts = vec![3, 0, 5];
1884 let string_counts = vec![2, 1, 3];
1885 let edge_counts = vec![4, 0, 6];
1886
1887 let plan = compute_commit_plan(
1888 &node_counts,
1889 &string_counts,
1890 &edge_counts,
1891 &file_ids,
1892 0,
1893 1, // string_offset=1 for sentinel
1894 );
1895
1896 assert_eq!(plan.total_nodes, 8);
1897 assert_eq!(plan.total_strings, 6);
1898 assert_eq!(plan.total_edges, 10);
1899
1900 // File 0: nodes [0..3), strings [1..3)
1901 assert_eq!(plan.file_plans[0].node_range, 0..3);
1902 assert_eq!(plan.file_plans[0].string_range, 1..3);
1903
1904 // File 1: nodes [3..3), strings [3..4) — empty nodes
1905 assert_eq!(plan.file_plans[1].node_range, 3..3);
1906 assert_eq!(plan.file_plans[1].string_range, 3..4);
1907
1908 // File 2: nodes [3..8), strings [4..7)
1909 assert_eq!(plan.file_plans[2].node_range, 3..8);
1910 assert_eq!(plan.file_plans[2].string_range, 4..7);
1911 }
1912
1913 #[test]
1914 fn test_compute_commit_plan_with_offsets() {
1915 let file_ids = vec![FileId::new(5)];
1916 let plan = compute_commit_plan(&[10], &[5], &[7], &file_ids, 100, 50);
1917 assert_eq!(plan.file_plans[0].node_range, 100..110);
1918 assert_eq!(plan.file_plans[0].string_range, 50..55);
1919 assert_eq!(plan.total_nodes, 10);
1920 assert_eq!(plan.total_strings, 5);
1921 assert_eq!(plan.total_edges, 7);
1922 }
1923
1924 #[test]
1925 fn test_compute_commit_plan_empty() {
1926 let plan = compute_commit_plan(&[], &[], &[], &[], 0, 1);
1927 assert_eq!(plan.total_nodes, 0);
1928 assert_eq!(plan.total_strings, 0);
1929 assert_eq!(plan.total_edges, 0);
1930 assert!(plan.file_plans.is_empty());
1931 }
1932
1933 #[test]
1934 fn test_remap_string_id_basic() {
1935 let mut remap = HashMap::new();
1936 remap.insert(StringId::new(1), StringId::new(100));
1937
1938 let mut id = StringId::new(1);
1939 remap_string_id(&mut id, &remap);
1940 assert_eq!(id, StringId::new(100));
1941 }
1942
1943 #[test]
1944 fn test_remap_string_id_not_in_remap() {
1945 let remap = HashMap::new();
1946 let mut id = StringId::new(42);
1947 remap_string_id(&mut id, &remap);
1948 assert_eq!(id, StringId::new(42)); // unchanged
1949 }
1950
1951 #[test]
1952 fn test_remap_option_string_id() {
1953 let mut remap = HashMap::new();
1954 remap.insert(StringId::new(5), StringId::new(50));
1955
1956 let mut some_id = Some(StringId::new(5));
1957 remap_option_string_id(&mut some_id, &remap);
1958 assert_eq!(some_id, Some(StringId::new(50)));
1959
1960 let mut none_id: Option<StringId> = None;
1961 remap_option_string_id(&mut none_id, &remap);
1962 assert_eq!(none_id, None);
1963 }
1964
1965 #[test]
1966 fn test_remap_edge_kind_imports() {
1967 let mut remap = HashMap::new();
1968 remap.insert(StringId::new(1), StringId::new(100));
1969
1970 let mut kind = EdgeKind::Imports {
1971 alias: Some(StringId::new(1)),
1972 is_wildcard: false,
1973 };
1974 remap_edge_kind_string_ids(&mut kind, &remap);
1975 assert!(
1976 matches!(kind, EdgeKind::Imports { alias: Some(id), .. } if id == StringId::new(100))
1977 );
1978 }
1979
1980 #[test]
1981 fn test_remap_edge_kind_trait_method_binding() {
1982 let mut remap = HashMap::new();
1983 remap.insert(StringId::new(1), StringId::new(100));
1984 remap.insert(StringId::new(2), StringId::new(200));
1985
1986 let mut kind = EdgeKind::TraitMethodBinding {
1987 trait_name: StringId::new(1),
1988 impl_type: StringId::new(2),
1989 is_ambiguous: false,
1990 };
1991 remap_edge_kind_string_ids(&mut kind, &remap);
1992 assert!(
1993 matches!(kind, EdgeKind::TraitMethodBinding { trait_name, impl_type, .. }
1994 if trait_name == StringId::new(100) && impl_type == StringId::new(200))
1995 );
1996 }
1997
1998 #[test]
1999 fn test_remap_edge_kind_no_op_variants() {
2000 let remap = HashMap::new();
2001
2002 // Defines — no StringId fields
2003 let mut kind = EdgeKind::Defines;
2004 remap_edge_kind_string_ids(&mut kind, &remap);
2005 assert!(matches!(kind, EdgeKind::Defines));
2006
2007 // Calls — no StringId fields
2008 let mut kind = EdgeKind::Calls {
2009 argument_count: 3,
2010 is_async: true,
2011 resolved_via: ResolvedVia::Direct,
2012 };
2013 remap_edge_kind_string_ids(&mut kind, &remap);
2014 assert!(matches!(
2015 kind,
2016 EdgeKind::Calls {
2017 argument_count: 3,
2018 is_async: true,
2019 resolved_via: ResolvedVia::Direct,
2020 }
2021 ));
2022 }
2023
2024 fn placeholder_entry() -> NodeEntry {
2025 use crate::graph::unified::node::NodeKind;
2026 NodeEntry::new(NodeKind::Function, StringId::new(0), FileId::new(0))
2027 }
2028
2029 #[test]
2030 fn test_phase2_assign_ranges_basic() {
2031 use super::super::staging::StagingGraph;
2032
2033 // Create 2 staging graphs with known counts
2034 let mut sg0 = StagingGraph::new();
2035 let mut sg1 = StagingGraph::new();
2036
2037 // sg0: 2 nodes, 1 string, 1 edge
2038 let entry0 = placeholder_entry();
2039 let n0 = sg0.add_node(entry0.clone());
2040 let n1 = sg0.add_node(entry0.clone());
2041 sg0.intern_string(StringId::new_local(0), "hello".into());
2042 sg0.add_edge(
2043 n0,
2044 n1,
2045 EdgeKind::Calls {
2046 argument_count: 0,
2047 is_async: false,
2048 resolved_via: ResolvedVia::Direct,
2049 },
2050 FileId::new(0),
2051 );
2052
2053 // sg1: 1 node, 2 strings, 0 edges
2054 sg1.add_node(entry0);
2055 sg1.intern_string(StringId::new_local(0), "world".into());
2056 sg1.intern_string(StringId::new_local(1), "foo".into());
2057
2058 let file_ids = vec![FileId::new(10), FileId::new(11)];
2059 let offsets = GlobalOffsets {
2060 node_offset: 5,
2061 string_offset: 3,
2062 };
2063
2064 let plan = phase2_assign_ranges(&[&sg0, &sg1], &file_ids, &offsets);
2065
2066 // sg0: 2 nodes, 1 string, 1 edge
2067 assert_eq!(plan.file_plans[0].node_range, 5..7);
2068 assert_eq!(plan.file_plans[0].string_range, 3..4);
2069
2070 // sg1: 1 node, 2 strings, 0 edges
2071 assert_eq!(plan.file_plans[1].node_range, 7..8);
2072 assert_eq!(plan.file_plans[1].string_range, 4..6);
2073
2074 assert_eq!(plan.total_nodes, 3);
2075 assert_eq!(plan.total_strings, 3);
2076 assert_eq!(plan.total_edges, 1);
2077 }
2078
2079 #[test]
2080 fn test_phase3_parallel_commit_basic() {
2081 use super::super::staging::StagingGraph;
2082 use crate::graph::unified::concurrent::CodeGraph;
2083 use crate::graph::unified::node::NodeKind;
2084 // The `nodes_mut` / `strings_mut` method calls below resolve
2085 // to inherent methods on `CodeGraph`; the `GraphMutationTarget`
2086 // trait impl provides the same surface for `RebuildGraph`
2087 // (see `phase3_parallel_commit_runs_against_rebuild_graph`).
2088 // No trait import is needed here because inherent-method
2089 // resolution wins for `CodeGraph`.
2090
2091 // Create a staging graph with 2 nodes, 1 string, 1 edge
2092 let mut sg = StagingGraph::new();
2093 let local_name = StringId::new_local(0);
2094 sg.intern_string(local_name, "my_func".into());
2095
2096 let entry = NodeEntry::new(NodeKind::Function, local_name, FileId::new(0));
2097 let n0 = sg.add_node(entry.clone());
2098
2099 let entry2 = NodeEntry::new(NodeKind::Variable, local_name, FileId::new(0));
2100 let n1 = sg.add_node(entry2);
2101
2102 sg.add_edge(
2103 n0,
2104 n1,
2105 EdgeKind::Calls {
2106 argument_count: 0,
2107 is_async: false,
2108 resolved_via: ResolvedVia::Direct,
2109 },
2110 FileId::new(0),
2111 );
2112
2113 let file_ids = vec![FileId::new(5)];
2114
2115 // Pre-allocate with non-zero offsets to verify remap works,
2116 // against a full `CodeGraph` so the new generic signature is
2117 // exercised end-to-end via `GraphMutationTarget`.
2118 let mut graph = CodeGraph::new();
2119 graph
2120 .nodes_mut()
2121 .alloc_range(10, &placeholder_entry())
2122 .unwrap();
2123 let string_start = graph.strings_mut().alloc_range(1).unwrap();
2124 assert_eq!(string_start, 1); // past sentinel
2125
2126 let offsets = GlobalOffsets {
2127 node_offset: 10, // file's nodes start at index 10
2128 string_offset: string_start,
2129 };
2130 let plan = phase2_assign_ranges(&[&sg], &file_ids, &offsets);
2131 assert_eq!(plan.file_plans[0].node_range, 10..12);
2132
2133 // Pre-allocate the actual ranges for Phase 3.
2134 graph
2135 .nodes_mut()
2136 .alloc_range(plan.total_nodes, &placeholder_entry())
2137 .unwrap();
2138 graph.strings_mut().alloc_range(plan.total_strings).unwrap();
2139
2140 // Phase 3 — generic over `G: GraphMutationTarget`. Passing
2141 // `&mut graph` infers `G = CodeGraph`.
2142 let result = phase3_parallel_commit(&plan, &[&sg], &mut graph);
2143
2144 // Verify written counts
2145 assert_eq!(result.total_nodes_written, 2);
2146 assert_eq!(result.total_strings_written, 1);
2147
2148 // Verify strings were written
2149 let global_name = StringId::new(string_start);
2150 assert_eq!(&*graph.strings().resolve(global_name).unwrap(), "my_func");
2151
2152 // Verify 1 file, 1 edge
2153 assert_eq!(result.per_file_edges.len(), 1);
2154 assert_eq!(result.per_file_edges[0].len(), 1);
2155
2156 // Verify edge was remapped to global IDs (node_offset=10)
2157 let edge = &result.per_file_edges[0][0];
2158 assert_eq!(edge.file, FileId::new(5));
2159 assert_eq!(edge.source, NodeId::new(10, 1)); // first node at slot 10
2160 assert_eq!(edge.target, NodeId::new(11, 1)); // second node at slot 11
2161
2162 // Gate 0c (iter-2 B2): per-file node IDs must be recorded in
2163 // commit order, one Vec per FilePlan, so the caller can
2164 // populate FileRegistry::per_file_nodes deterministically.
2165 assert_eq!(result.per_file_node_ids.len(), 1);
2166 assert_eq!(
2167 result.per_file_node_ids[0],
2168 vec![NodeId::new(10, 1), NodeId::new(11, 1)]
2169 );
2170 }
2171
2172 #[test]
2173 fn test_phase3_parallel_commit_empty() {
2174 use crate::graph::unified::concurrent::CodeGraph;
2175
2176 let mut graph = CodeGraph::new();
2177
2178 let plan = ChunkCommitPlan {
2179 file_plans: vec![],
2180 total_nodes: 0,
2181 total_strings: 0,
2182 total_edges: 0,
2183 };
2184
2185 let result = phase3_parallel_commit(&plan, &[], &mut graph);
2186 assert!(result.per_file_edges.is_empty());
2187 assert!(result.per_file_node_ids.is_empty());
2188 assert_eq!(result.total_nodes_written, 0);
2189 assert_eq!(result.total_strings_written, 0);
2190 }
2191
2192 /// Task 4 Step 4 Phase 1 — exercise the `GraphMutationTarget`
2193 /// trait's second implementor.
2194 ///
2195 /// Builds a tiny staging graph, hosts it in a fresh `RebuildGraph`,
2196 /// and asserts the committed nodes land in the **rebuild-local**
2197 /// arena — not in a `CodeGraph`. The test also confirms the
2198 /// per-file edges / node-id vectors the helper returns agree with
2199 /// the `CodeGraph` call-path result shape.
2200 ///
2201 /// If a future refactor accidentally routed Phase 3 back to a
2202 /// `CodeGraph` (e.g. through a hidden static `Arc::make_mut`), this
2203 /// test would observe an empty rebuild arena and fail.
2204 #[test]
2205 #[cfg(feature = "rebuild-internals")]
2206 fn phase3_parallel_commit_runs_against_rebuild_graph() {
2207 use super::super::staging::StagingGraph;
2208 use crate::graph::unified::concurrent::CodeGraph;
2209 use crate::graph::unified::mutation_target::GraphMutationTarget;
2210 use crate::graph::unified::node::NodeKind;
2211
2212 // Staging graph: 2 nodes + 1 string + 1 Calls edge (identical
2213 // shape to the CodeGraph test above, so any behavioural drift
2214 // between the two paths surfaces as different assertions).
2215 let mut sg = StagingGraph::new();
2216 let local_name = StringId::new_local(0);
2217 sg.intern_string(local_name, "rebuild_target".into());
2218 let entry = NodeEntry::new(NodeKind::Function, local_name, FileId::new(0));
2219 let n0 = sg.add_node(entry.clone());
2220 let entry2 = NodeEntry::new(NodeKind::Variable, local_name, FileId::new(0));
2221 let n1 = sg.add_node(entry2);
2222 sg.add_edge(
2223 n0,
2224 n1,
2225 EdgeKind::Calls {
2226 argument_count: 0,
2227 is_async: false,
2228 resolved_via: ResolvedVia::Direct,
2229 },
2230 FileId::new(0),
2231 );
2232
2233 // Produce a RebuildGraph from an empty CodeGraph; drop the
2234 // CodeGraph immediately so any subsequent mutation observed in
2235 // the rebuild cannot possibly be leaking back to a shared Arc.
2236 let mut rebuild = {
2237 let graph = CodeGraph::new();
2238 graph.clone_for_rebuild()
2239 };
2240
2241 // Pre-allocate leading slots on the rebuild-local arena +
2242 // interner so the file's ranges begin at a non-zero offset —
2243 // this is the same pattern the CodeGraph test uses, verifying
2244 // the trait's disjoint-borrow combinator threads through
2245 // identically.
2246 rebuild
2247 .nodes_mut()
2248 .alloc_range(10, &placeholder_entry())
2249 .unwrap();
2250 let string_start = rebuild.strings_mut().alloc_range(1).unwrap();
2251 assert_eq!(string_start, 1);
2252
2253 let file_ids = vec![FileId::new(5)];
2254 let offsets = GlobalOffsets {
2255 node_offset: 10,
2256 string_offset: string_start,
2257 };
2258 let plan = phase2_assign_ranges(&[&sg], &file_ids, &offsets);
2259
2260 rebuild
2261 .nodes_mut()
2262 .alloc_range(plan.total_nodes, &placeholder_entry())
2263 .unwrap();
2264 rebuild
2265 .strings_mut()
2266 .alloc_range(plan.total_strings)
2267 .unwrap();
2268
2269 // Phase 3 against the RebuildGraph. Inferred `G = RebuildGraph`.
2270 let result = phase3_parallel_commit(&plan, &[&sg], &mut rebuild);
2271
2272 // === Invariant: the written data lives in the rebuild-local
2273 // arena, not in any CodeGraph field. ===
2274 //
2275 // Two slot ranges exist on the rebuild's arena now:
2276 // * slots 0..10 = pre-fill placeholders (each `Function` /
2277 // `StringId::new(0)` — note every alloc_range writes a
2278 // clone of the template entry).
2279 // * slots 10..12 = the two committed nodes from `sg`.
2280 //
2281 // Fetch the two committed NodeIds and resolve their names
2282 // through the rebuild-local interner; the string must match
2283 // the staged value "rebuild_target", proving the commit ran
2284 // on the rebuild's own fields.
2285 let committed_ids = &result.per_file_node_ids[0];
2286 assert_eq!(
2287 committed_ids,
2288 &vec![NodeId::new(10, 1), NodeId::new(11, 1)],
2289 "Phase 3 must commit into slots 10..12 on the rebuild-local arena"
2290 );
2291
2292 let resolved_name = rebuild
2293 .nodes_mut()
2294 .get(NodeId::new(10, 1))
2295 .map(|entry| entry.name)
2296 .expect("committed node must exist in rebuild arena");
2297 // The name StringId on the committed node is a global ID
2298 // (Phase 3 remaps local → global); resolving it through the
2299 // rebuild-local interner must produce the staged value.
2300 let resolved_str = rebuild
2301 .strings_mut()
2302 .resolve(resolved_name)
2303 .expect("name must resolve in rebuild-local interner");
2304 assert_eq!(&*resolved_str, "rebuild_target");
2305
2306 // === Shape invariants match the CodeGraph path ===
2307 assert_eq!(result.total_nodes_written, 2);
2308 assert_eq!(result.total_strings_written, 1);
2309 assert_eq!(result.per_file_edges.len(), 1);
2310 assert_eq!(result.per_file_edges[0].len(), 1);
2311 let edge = &result.per_file_edges[0][0];
2312 assert_eq!(edge.file, FileId::new(5));
2313 assert_eq!(edge.source, NodeId::new(10, 1));
2314 assert_eq!(edge.target, NodeId::new(11, 1));
2315 }
2316
2317 #[test]
2318 fn test_commit_single_file_string_remap() {
2319 use super::super::staging::StagingGraph;
2320 use crate::graph::unified::node::NodeKind;
2321
2322 let mut sg = StagingGraph::new();
2323 let local_0 = StringId::new_local(0);
2324 let local_1 = StringId::new_local(1);
2325 sg.intern_string(local_0, "alpha".into());
2326 sg.intern_string(local_1, "beta".into());
2327
2328 let mut entry = NodeEntry::new(NodeKind::Function, local_0, FileId::new(0));
2329 entry.signature = Some(local_1);
2330 sg.add_node(entry);
2331
2332 let plan = FilePlan {
2333 parsed_index: 0,
2334 file_id: FileId::new(42),
2335 node_range: 10..11,
2336 string_range: 20..22,
2337 };
2338
2339 let mut node_slots = vec![Slot::new_occupied(1, placeholder_entry())];
2340 let mut str_slots: Vec<Option<Arc<str>>> = vec![None, None];
2341 let mut rc_slots: Vec<u32> = vec![0, 0];
2342
2343 let result = commit_single_file(&sg, &plan, &mut node_slots, &mut str_slots, &mut rc_slots);
2344
2345 // Strings written
2346 assert_eq!(str_slots[0].as_deref(), Some("alpha"));
2347 assert_eq!(str_slots[1].as_deref(), Some("beta"));
2348 assert_eq!(rc_slots[0], 1);
2349 assert_eq!(rc_slots[1], 1);
2350 assert_eq!(result.strings_written, 2);
2351
2352 // Node entry has remapped StringIds
2353 if let crate::graph::unified::storage::SlotState::Occupied(entry) = node_slots[0].state() {
2354 assert_eq!(entry.name, StringId::new(20)); // global slot 20
2355 assert_eq!(entry.signature, Some(StringId::new(21))); // global slot 21
2356 assert_eq!(entry.file, FileId::new(42));
2357 } else {
2358 panic!("Expected occupied slot");
2359 }
2360 assert_eq!(result.nodes_written, 1);
2361
2362 // Per-file node IDs are recorded in commit order (Gate 0c bucket contract).
2363 assert_eq!(result.node_ids, vec![NodeId::new(10, 1)]);
2364
2365 // No edges
2366 assert!(result.edges.is_empty());
2367 }
2368
2369 #[test]
2370 fn test_remap_edge_kind_message_queue_other() {
2371 let mut remap = HashMap::new();
2372 remap.insert(StringId::new(10), StringId::new(110));
2373 remap.insert(StringId::new(20), StringId::new(220));
2374
2375 let mut kind = EdgeKind::MessageQueue {
2376 protocol: MqProtocol::Other(StringId::new(10)),
2377 topic: Some(StringId::new(20)),
2378 };
2379 remap_edge_kind_string_ids(&mut kind, &remap);
2380 assert!(matches!(
2381 kind,
2382 EdgeKind::MessageQueue {
2383 protocol: MqProtocol::Other(proto),
2384 topic: Some(topic),
2385 } if proto == StringId::new(110) && topic == StringId::new(220)
2386 ));
2387 }
2388
2389 // === Phase 4 tests ===
2390
2391 #[test]
2392 fn test_phase4_apply_global_remap_basic() {
2393 use crate::graph::unified::node::NodeKind;
2394 use crate::graph::unified::storage::NodeArena;
2395
2396 let mut arena = NodeArena::new();
2397
2398 // Allocate two nodes with duplicate string IDs (2 and 3 are dupes of 1)
2399 let entry1 = NodeEntry::new(NodeKind::Function, StringId::new(1), FileId::new(0));
2400 let mut entry2 = NodeEntry::new(NodeKind::Variable, StringId::new(2), FileId::new(0));
2401 entry2.signature = Some(StringId::new(3));
2402
2403 arena.alloc(entry1).unwrap();
2404 arena.alloc(entry2).unwrap();
2405
2406 // Edges with string IDs that need remapping
2407 let mut all_edges = vec![vec![PendingEdge {
2408 source: NodeId::new(0, 1),
2409 target: NodeId::new(1, 1),
2410 kind: EdgeKind::Imports {
2411 alias: Some(StringId::new(3)),
2412 is_wildcard: false,
2413 },
2414 file: FileId::new(0),
2415 spans: vec![],
2416 }]];
2417
2418 // Dedup remap: 2→1, 3→1
2419 let mut remap = HashMap::new();
2420 remap.insert(StringId::new(2), StringId::new(1));
2421 remap.insert(StringId::new(3), StringId::new(1));
2422
2423 phase4_apply_global_remap(&mut arena, &mut all_edges, &remap);
2424
2425 // Check that node 1's name was remapped from 2→1
2426 let (_, entry) = arena.iter().nth(1).unwrap();
2427 assert_eq!(entry.name, StringId::new(1));
2428 assert_eq!(entry.signature, Some(StringId::new(1)));
2429
2430 // Check that edge's alias was remapped from 3→1
2431 if let EdgeKind::Imports { alias, .. } = &all_edges[0][0].kind {
2432 assert_eq!(*alias, Some(StringId::new(1)));
2433 } else {
2434 panic!("Expected Imports edge");
2435 }
2436 }
2437
2438 #[test]
2439 fn test_phase4_apply_global_remap_empty() {
2440 use crate::graph::unified::storage::NodeArena;
2441
2442 let mut arena = NodeArena::new();
2443 let mut edges: Vec<Vec<PendingEdge>> = vec![];
2444 let remap = HashMap::new();
2445
2446 // Should be a no-op
2447 phase4_apply_global_remap(&mut arena, &mut edges, &remap);
2448 }
2449
2450 #[test]
2451 fn test_pending_edges_to_delta_basic() {
2452 let edges = vec![
2453 vec![
2454 PendingEdge {
2455 source: NodeId::new(0, 1),
2456 target: NodeId::new(1, 1),
2457 kind: EdgeKind::Calls {
2458 argument_count: 0,
2459 is_async: false,
2460 resolved_via: ResolvedVia::Direct,
2461 },
2462 file: FileId::new(0),
2463 spans: vec![],
2464 },
2465 PendingEdge {
2466 source: NodeId::new(1, 1),
2467 target: NodeId::new(2, 1),
2468 kind: EdgeKind::References,
2469 file: FileId::new(0),
2470 spans: vec![],
2471 },
2472 ],
2473 vec![PendingEdge {
2474 source: NodeId::new(3, 1),
2475 target: NodeId::new(4, 1),
2476 kind: EdgeKind::Defines,
2477 file: FileId::new(1),
2478 spans: vec![],
2479 }],
2480 ];
2481
2482 let (deltas, final_seq) = pending_edges_to_delta(&edges, 100);
2483
2484 assert_eq!(deltas.len(), 2);
2485 assert_eq!(deltas[0].len(), 2);
2486 assert_eq!(deltas[1].len(), 1);
2487 assert_eq!(final_seq, 103);
2488
2489 // Check sequence numbers are monotonic
2490 assert_eq!(deltas[0][0].seq, 100);
2491 assert_eq!(deltas[0][1].seq, 101);
2492 assert_eq!(deltas[1][0].seq, 102);
2493
2494 // Check all are Add operations
2495 assert!(matches!(deltas[0][0].op, DeltaOp::Add));
2496 assert!(matches!(deltas[1][0].op, DeltaOp::Add));
2497 }
2498
2499 #[test]
2500 fn test_pending_edges_to_delta_empty() {
2501 let edges: Vec<Vec<PendingEdge>> = vec![];
2502 let (deltas, final_seq) = pending_edges_to_delta(&edges, 0);
2503 assert!(deltas.is_empty());
2504 assert_eq!(final_seq, 0);
2505 }
2506
2507 // ==================================================================
2508 // Task 4 Step 4 Phase 2: rebuild-plane coverage for migrated helpers.
2509 //
2510 // Each test below proves that the migrated helper runs against a
2511 // `RebuildGraph` (not just a `CodeGraph`) and that the mutation
2512 // lands on the rebuild-local state. Together with the CodeGraph
2513 // tests that still exercise the same helpers on the full-build
2514 // path, they form the "runs on both implementors" coverage
2515 // contract for `GraphMutationTarget` consumers.
2516 // ==================================================================
2517
2518 /// Seed two call-compatible nodes (both `NodeKind::Function`) under
2519 /// the same qualified-name StringId across two distinct files, then
2520 /// run [`phase4c_prime_unify_cross_file_nodes`] against a
2521 /// [`RebuildGraph`]. Verify the loser node is tombstoned
2522 /// (name + qualified_name cleared per `merge_node_into`'s contract)
2523 /// and that pending edges pointing at the loser are rewritten to
2524 /// the winner.
2525 #[test]
2526 #[cfg(feature = "rebuild-internals")]
2527 fn phase4c_prime_unify_cross_file_nodes_runs_against_rebuild_graph() {
2528 use crate::graph::unified::concurrent::CodeGraph;
2529 use crate::graph::unified::mutation_target::GraphMutationTarget;
2530 use crate::graph::unified::node::NodeKind;
2531
2532 let mut rebuild = {
2533 let graph = CodeGraph::new();
2534 graph.clone_for_rebuild()
2535 };
2536
2537 // Intern a shared qualified name. On the rebuild-local
2538 // interner; strings() resolves it for later assertions.
2539 let qname_sid = rebuild.strings_mut().intern("my_mod::my_func").unwrap();
2540
2541 // Register two files that host the duplicate Function nodes.
2542 let file_a = FileId::new(7);
2543 let file_b = FileId::new(8);
2544
2545 // Build two `NodeKind::Function` entries sharing the same
2546 // qualified_name. Winner has a wider span (start_line > 0 and
2547 // end_line > start_line) to exercise the winner-selection
2548 // tie-break.
2549 let mut winner_entry = NodeEntry::new(NodeKind::Function, qname_sid, file_a);
2550 winner_entry.qualified_name = Some(qname_sid);
2551 winner_entry.start_line = 10;
2552 winner_entry.end_line = 30;
2553
2554 let mut loser_entry = NodeEntry::new(NodeKind::Function, qname_sid, file_b);
2555 loser_entry.qualified_name = Some(qname_sid);
2556 // Narrower span → loses the tie-break.
2557 loser_entry.start_line = 5;
2558 loser_entry.end_line = 6;
2559
2560 let winner_id = rebuild.nodes_mut().alloc(winner_entry).unwrap();
2561 let loser_id = rebuild.nodes_mut().alloc(loser_entry).unwrap();
2562
2563 // A pending edge whose target is the loser — the remap table
2564 // should rewrite it to point at the winner.
2565 let mut all_edges = vec![vec![PendingEdge {
2566 source: winner_id, // any valid source — the helper only rewrites targets here
2567 target: loser_id,
2568 kind: EdgeKind::Calls {
2569 argument_count: 0,
2570 is_async: false,
2571 resolved_via: ResolvedVia::Direct,
2572 },
2573 file: file_b,
2574 spans: vec![],
2575 }]];
2576
2577 let (stats, _remap) = phase4c_prime_unify_cross_file_nodes(&mut rebuild, &mut all_edges);
2578
2579 // Stats shape
2580 assert_eq!(stats.nodes_merged, 1, "exactly one loser was tombstoned");
2581 assert_eq!(stats.candidate_pairs_examined, 1);
2582 assert_eq!(stats.edges_rewritten, 1);
2583
2584 // Winner node survived with qualified_name intact.
2585 let winner_entry_after = GraphMutationTarget::nodes(&rebuild)
2586 .get(winner_id)
2587 .expect("winner must remain live");
2588 assert_eq!(
2589 winner_entry_after.qualified_name,
2590 Some(qname_sid),
2591 "winner keeps its qualified_name"
2592 );
2593
2594 // Loser entry was merged via `merge_node_into`, which clears
2595 // `name` and `qualified_name` to make the slot name-invisible.
2596 let loser_entry_after = GraphMutationTarget::nodes(&rebuild)
2597 .get(loser_id)
2598 .expect("loser slot remains live (inert) per §F.1 bijection");
2599 assert_eq!(
2600 loser_entry_after.qualified_name, None,
2601 "loser qualified_name cleared by merge_node_into"
2602 );
2603
2604 // Pending edge target rewritten winner-ward.
2605 assert_eq!(
2606 all_edges[0][0].target, winner_id,
2607 "PendingEdge.target rewritten from loser → winner"
2608 );
2609 }
2610
2611 /// Lock in the Phase 4c-prime tie-break ordering Codex blessed in iter-1:
2612 /// primary = `start_line > 0`, tie-break 1 = wider span, tie-break 2 =
2613 /// lexicographically smaller **file path** (stable across rebuild
2614 /// representations), final fallback = smaller `NodeId::index()`.
2615 ///
2616 /// This test exercises the tie-break 2 path: two candidates with real
2617 /// spans of identical width, hosted in two different files that differ
2618 /// only in filename ordering. The winner must be the node whose file
2619 /// path sorts earlier, regardless of NodeId allocation order.
2620 #[test]
2621 #[cfg(feature = "rebuild-internals")]
2622 fn phase4c_prime_tie_break_prefers_lex_smaller_path_over_node_id() {
2623 use crate::graph::unified::concurrent::CodeGraph;
2624 use crate::graph::unified::node::NodeKind;
2625 use std::path::Path;
2626
2627 let mut graph = CodeGraph::new();
2628 let qname = graph.strings_mut().intern("shared_qname").unwrap();
2629 // Register two paths whose lexical ordering is the reverse of
2630 // the registration (and hence NodeId) order. This isolates the
2631 // path-based tie-break from any accidental NodeId-ordering
2632 // coincidence: if the helper fell back to NodeId the "wrong"
2633 // node would win.
2634 let high_path_file = graph
2635 .files_mut()
2636 .register(Path::new("zzz_late.rs"))
2637 .unwrap();
2638 let low_path_file = graph
2639 .files_mut()
2640 .register(Path::new("aaa_early.rs"))
2641 .unwrap();
2642
2643 // Allocate the `zzz_late.rs` node first so its NodeId::index() is
2644 // numerically smaller than the `aaa_early.rs` node's. With
2645 // identical spans, NodeId-only tie-break would incorrectly pick
2646 // the `zzz_late.rs` node. The correct behaviour is that the
2647 // path-based tie-break picks the `aaa_early.rs` node.
2648 let mut high_entry = NodeEntry::new(NodeKind::Function, qname, high_path_file);
2649 high_entry.qualified_name = Some(qname);
2650 high_entry.start_line = 10;
2651 high_entry.end_line = 20;
2652 let high_node = graph.nodes_mut().alloc(high_entry).unwrap();
2653
2654 let mut low_entry = NodeEntry::new(NodeKind::Function, qname, low_path_file);
2655 low_entry.qualified_name = Some(qname);
2656 // Identical span width — forces the tie-break to ignore primary
2657 // + tie-break 1 (span width) and reach tie-break 2 (path).
2658 low_entry.start_line = 10;
2659 low_entry.end_line = 20;
2660 let low_node = graph.nodes_mut().alloc(low_entry).unwrap();
2661
2662 graph.rebuild_indices();
2663
2664 let mut all_edges: Vec<Vec<PendingEdge>> = Vec::new();
2665 let (stats, _remap) = phase4c_prime_unify_cross_file_nodes(&mut graph, &mut all_edges);
2666
2667 assert_eq!(
2668 stats.nodes_merged, 1,
2669 "one of the duplicate nodes must be merged into the other"
2670 );
2671
2672 // The `aaa_early.rs` node wins because its path sorts lexically
2673 // smaller. Verify its qualified_name is intact.
2674 let low_after = graph
2675 .nodes()
2676 .get(low_node)
2677 .expect("winner slot remains live");
2678 assert_eq!(
2679 low_after.qualified_name,
2680 Some(qname),
2681 "path-earlier node keeps qualified_name as the unification winner"
2682 );
2683
2684 // And the `zzz_late.rs` node — despite a numerically smaller
2685 // NodeId::index() — was merged away.
2686 let high_after = graph
2687 .nodes()
2688 .get(high_node)
2689 .expect("loser slot remains inert (Gate 0d bijection contract)");
2690 assert_eq!(
2691 high_after.qualified_name, None,
2692 "path-later node loses even when its NodeId::index() is smaller"
2693 );
2694 }
2695
2696 /// When the path-based tie-break ALSO ties (two duplicate nodes in the
2697 /// same file — rare but possible via duplicate definitions), the
2698 /// deterministic fallback is `b.index().cmp(&a.index())` which picks
2699 /// the node with the **smaller** NodeId index. Lock that in so future
2700 /// refactors of the tie-break don't accidentally flip the fallback
2701 /// direction.
2702 #[test]
2703 #[cfg(feature = "rebuild-internals")]
2704 fn phase4c_prime_tie_break_falls_back_to_smaller_node_id_on_identical_path() {
2705 use crate::graph::unified::concurrent::CodeGraph;
2706 use crate::graph::unified::node::NodeKind;
2707 use std::path::Path;
2708
2709 let mut graph = CodeGraph::new();
2710 let qname = graph.strings_mut().intern("shared_qname").unwrap();
2711 let file = graph.files_mut().register(Path::new("shared.rs")).unwrap();
2712
2713 // Allocate two duplicate nodes in the SAME file with identical
2714 // spans. The only thing that differs between them is their
2715 // NodeId index (allocation order). Tie-breaks 1 (span width)
2716 // and 2 (path) both return Equal; the final `b.index().cmp(&a.index())`
2717 // fallback picks the smaller index as the winner.
2718 let mut first_entry = NodeEntry::new(NodeKind::Function, qname, file);
2719 first_entry.qualified_name = Some(qname);
2720 first_entry.start_line = 1;
2721 first_entry.end_line = 5;
2722 let first_node = graph.nodes_mut().alloc(first_entry).unwrap();
2723
2724 let mut second_entry = NodeEntry::new(NodeKind::Function, qname, file);
2725 second_entry.qualified_name = Some(qname);
2726 second_entry.start_line = 1;
2727 second_entry.end_line = 5;
2728 let second_node = graph.nodes_mut().alloc(second_entry).unwrap();
2729
2730 assert!(
2731 first_node.index() < second_node.index(),
2732 "precondition: first_node's arena slot precedes second_node's"
2733 );
2734
2735 graph.rebuild_indices();
2736
2737 let mut all_edges: Vec<Vec<PendingEdge>> = Vec::new();
2738 let (stats, _remap) = phase4c_prime_unify_cross_file_nodes(&mut graph, &mut all_edges);
2739
2740 assert_eq!(stats.nodes_merged, 1);
2741
2742 // Smaller NodeId::index() wins.
2743 let winner_after = graph.nodes().get(first_node).expect("winner live");
2744 assert_eq!(
2745 winner_after.qualified_name,
2746 Some(qname),
2747 "smaller-index node wins the same-path / same-span tie-break"
2748 );
2749 let loser_after = graph.nodes().get(second_node).expect("loser inert");
2750 assert_eq!(
2751 loser_after.qualified_name, None,
2752 "larger-index node loses the same-path / same-span tie-break"
2753 );
2754 }
2755
2756 /// Drive the free [`rebuild_indices`] function against both a
2757 /// `RebuildGraph` and a `CodeGraph` seeded with identical data,
2758 /// and verify the resulting `AuxiliaryIndices` are structurally
2759 /// equivalent (same name buckets, same kind buckets).
2760 #[test]
2761 #[cfg(feature = "rebuild-internals")]
2762 fn rebuild_indices_runs_against_rebuild_graph() {
2763 use crate::graph::unified::concurrent::CodeGraph;
2764 use crate::graph::unified::mutation_target::GraphMutationTarget;
2765 use crate::graph::unified::node::NodeKind;
2766
2767 // === CodeGraph baseline ===
2768 let mut code_graph = CodeGraph::new();
2769 let alpha_id_code = code_graph.strings_mut().intern("alpha").unwrap();
2770 let mut code_entry = NodeEntry::new(NodeKind::Function, alpha_id_code, FileId::new(1));
2771 code_entry.qualified_name = Some(alpha_id_code);
2772 let code_node_id = code_graph.nodes_mut().alloc(code_entry).unwrap();
2773 rebuild_indices(&mut code_graph);
2774 let code_buckets_function: Vec<NodeId> =
2775 code_graph.indices().by_kind(NodeKind::Function).to_vec();
2776
2777 // === RebuildGraph path ===
2778 let mut rebuild = {
2779 let graph = CodeGraph::new();
2780 graph.clone_for_rebuild()
2781 };
2782 let alpha_id_rebuild = rebuild.strings_mut().intern("alpha").unwrap();
2783 let mut rebuild_entry =
2784 NodeEntry::new(NodeKind::Function, alpha_id_rebuild, FileId::new(1));
2785 rebuild_entry.qualified_name = Some(alpha_id_rebuild);
2786 let rebuild_node_id = rebuild.nodes_mut().alloc(rebuild_entry).unwrap();
2787 rebuild_indices(&mut rebuild);
2788
2789 // The node ids are both the first allocation on their
2790 // respective arenas, so they share slot indices and
2791 // generations.
2792 assert_eq!(code_node_id, rebuild_node_id);
2793
2794 // The trait-method accessor routes through the impl on
2795 // `RebuildGraph`; the returned indices came from the
2796 // rebuild-local `AuxiliaryIndices` (not a CodeGraph's).
2797 let rebuild_buckets_function: Vec<NodeId> = GraphMutationTarget::indices(&rebuild)
2798 .by_kind(NodeKind::Function)
2799 .to_vec();
2800
2801 assert_eq!(
2802 code_buckets_function, rebuild_buckets_function,
2803 "rebuild_indices must produce equivalent Function buckets on both paths"
2804 );
2805 // Name bucket also present on the rebuild side.
2806 let by_name: Vec<NodeId> = GraphMutationTarget::indices(&rebuild)
2807 .by_name(alpha_id_rebuild)
2808 .to_vec();
2809 assert_eq!(by_name, vec![rebuild_node_id]);
2810 }
2811
2812 /// Drive [`phase4d_bulk_insert_edges`] against a `RebuildGraph`.
2813 /// Seed two nodes, construct a per-file `PendingEdge` vector, and
2814 /// prove the edges land on the rebuild-local edge store with the
2815 /// expected monotonically-advancing sequence counter.
2816 #[test]
2817 #[cfg(feature = "rebuild-internals")]
2818 fn phase4d_bulk_insert_edges_runs_against_rebuild_graph() {
2819 use crate::graph::unified::concurrent::CodeGraph;
2820 use crate::graph::unified::mutation_target::GraphMutationTarget;
2821 use crate::graph::unified::node::NodeKind;
2822
2823 let mut rebuild = {
2824 let graph = CodeGraph::new();
2825 graph.clone_for_rebuild()
2826 };
2827
2828 let name_sid = rebuild.strings_mut().intern("edge_target").unwrap();
2829 let file = FileId::new(3);
2830
2831 let n_source = rebuild
2832 .nodes_mut()
2833 .alloc(NodeEntry::new(NodeKind::Function, name_sid, file))
2834 .unwrap();
2835 let n_target = rebuild
2836 .nodes_mut()
2837 .alloc(NodeEntry::new(NodeKind::Variable, name_sid, file))
2838 .unwrap();
2839
2840 // Pre-condition: no edges in the rebuild-local forward store.
2841 let pre_counter = GraphMutationTarget::edges(&rebuild).forward().seq_counter();
2842
2843 let per_file_edges = vec![vec![
2844 PendingEdge {
2845 source: n_source,
2846 target: n_target,
2847 kind: EdgeKind::Calls {
2848 argument_count: 0,
2849 is_async: false,
2850 resolved_via: ResolvedVia::Direct,
2851 },
2852 file,
2853 spans: vec![],
2854 },
2855 PendingEdge {
2856 source: n_source,
2857 target: n_target,
2858 kind: EdgeKind::Calls {
2859 argument_count: 1,
2860 is_async: false,
2861 resolved_via: ResolvedVia::Direct,
2862 },
2863 file,
2864 spans: vec![],
2865 },
2866 ]];
2867
2868 let final_seq = phase4d_bulk_insert_edges(&mut rebuild, &per_file_edges);
2869
2870 // Seq counter advanced by exactly two edges.
2871 assert_eq!(
2872 final_seq,
2873 pre_counter + 2,
2874 "phase4d_bulk_insert_edges must advance seq by edge count"
2875 );
2876
2877 // Rebuild-local forward store now contains both edges.
2878 let forward = GraphMutationTarget::edges(&rebuild).forward();
2879 let after_counter = forward.seq_counter();
2880 assert_eq!(after_counter, pre_counter + 2);
2881 // Forward delta must carry the two new edges.
2882 assert!(
2883 forward.delta().iter().filter(|e| e.is_add()).count() >= 2,
2884 "expected at least two Add edges in the rebuild-local forward delta"
2885 );
2886 drop(forward);
2887
2888 // Empty input is a no-op on the edge store.
2889 let empty_final = phase4d_bulk_insert_edges(&mut rebuild, &[]);
2890 assert_eq!(empty_final, pre_counter + 2, "empty input is a no-op");
2891 }
2892
2893 /// `C_EDGE_MIGRATE` regression: when a Cluster C plugin migrates a
2894 /// `TypeOf{Field}` edge's source from a struct node to the per-field
2895 /// `Property` node, Phase 4d must NOT collapse the new shape onto
2896 /// any sibling edge. Both Property-sourced and struct-sourced
2897 /// edges - including a struct-sourced edge over the same target /
2898 /// kind tuple - must round-trip into the bulk-insert path with
2899 /// distinct `(source, target)` identities and stable seq ordering.
2900 ///
2901 /// This locks the property the
2902 /// `phase4d_bulk_insert_edges` doc-comment promises to plugin
2903 /// authors: per-file `PendingEdge` order is preserved 1:1 by
2904 /// `pending_edges_to_delta`, and no `(source, target, kind)` dedup
2905 /// fires inside Phase 4d. Without this guarantee the migration
2906 /// would silently drop the new Property-sourced edges whenever an
2907 /// older legacy snapshot mixed both shapes during a partial
2908 /// rebuild.
2909 #[test]
2910 fn phase4d_preserves_property_sourced_typeof_field_edges() {
2911 use crate::graph::unified::edge::kind::TypeOfContext;
2912
2913 // Synthetic NodeIds standing in for `main.SelectorSource` (struct),
2914 // `main.SelectorSource.NeedTags` (Property), and `bool` (target type).
2915 let struct_id = NodeId::new(10, 1);
2916 let property_id = NodeId::new(11, 1);
2917 let bool_id = NodeId::new(12, 1);
2918
2919 let typeof_field_kind = EdgeKind::TypeOf {
2920 context: Some(TypeOfContext::Field),
2921 index: Some(0),
2922 name: None,
2923 };
2924
2925 // Two PendingEdges over the same (target, kind) discriminator
2926 // but different sources - the post-migration Property-sourced
2927 // shape and a hypothetical legacy struct-sourced shadow that
2928 // could appear during a partial rebuild. Phase 4d must keep
2929 // both.
2930 let per_file_edges = vec![vec![
2931 PendingEdge {
2932 source: property_id,
2933 target: bool_id,
2934 kind: typeof_field_kind.clone(),
2935 file: FileId::new(0),
2936 spans: vec![],
2937 },
2938 PendingEdge {
2939 source: struct_id,
2940 target: bool_id,
2941 kind: typeof_field_kind.clone(),
2942 file: FileId::new(0),
2943 spans: vec![],
2944 },
2945 ]];
2946
2947 let (deltas, final_seq) = pending_edges_to_delta(&per_file_edges, 500);
2948
2949 // No dedup: both edges land in the per-file delta vector with
2950 // distinct seq numbers, in input order.
2951 assert_eq!(deltas.len(), 1);
2952 assert_eq!(deltas[0].len(), 2);
2953 assert_eq!(final_seq, 502);
2954
2955 assert_eq!(deltas[0][0].source, property_id);
2956 assert_eq!(deltas[0][0].target, bool_id);
2957 assert_eq!(deltas[0][0].seq, 500);
2958 assert!(matches!(
2959 deltas[0][0].kind,
2960 EdgeKind::TypeOf {
2961 context: Some(TypeOfContext::Field),
2962 ..
2963 }
2964 ));
2965
2966 assert_eq!(deltas[0][1].source, struct_id);
2967 assert_eq!(deltas[0][1].target, bool_id);
2968 assert_eq!(deltas[0][1].seq, 501);
2969
2970 // Determinism re-check: re-running the conversion against the
2971 // same input produces an identical DeltaEdge sequence (same
2972 // sources, same targets, same kinds, same seq numbers when
2973 // re-anchored to the same `seq_start`). This is the property
2974 // the SnapshotReader → SnapshotWriter byte-identity round-trip
2975 // assertion relies on for fresh-rebuild reproducibility.
2976 let (deltas_again, final_seq_again) = pending_edges_to_delta(&per_file_edges, 500);
2977 assert_eq!(final_seq_again, final_seq);
2978 assert_eq!(deltas_again.len(), deltas.len());
2979 assert_eq!(deltas_again[0].len(), deltas[0].len());
2980 for (a, b) in deltas[0].iter().zip(deltas_again[0].iter()) {
2981 assert_eq!(a.source, b.source);
2982 assert_eq!(a.target, b.target);
2983 assert_eq!(a.seq, b.seq);
2984 }
2985 }
2986
2987 // ----------------------------------------------------------------------
2988 // T3 Cluster B (02_DESIGN §4.3.e Change 4): Phase 4d-prime propagation
2989 // ----------------------------------------------------------------------
2990
2991 /// Build a per-file `NodeMetadataStore` carrying one Macro entry with
2992 /// a `cfg_condition` so the merge step is non-vacuous.
2993 fn macro_store_with(
2994 node_id: NodeId,
2995 cfg: &str,
2996 ) -> crate::graph::unified::storage::metadata::NodeMetadataStore {
2997 use crate::graph::unified::storage::metadata::{MacroNodeMetadata, NodeMetadataStore};
2998 let mut store = NodeMetadataStore::new();
2999 let m = MacroNodeMetadata {
3000 cfg_condition: Some(cfg.to_string()),
3001 ..Default::default()
3002 };
3003 store.insert(node_id, m);
3004 store
3005 }
3006
3007 #[test]
3008 fn phase4d_prime_merges_per_file_metadata_into_graph_macro_metadata() {
3009 use super::super::unification::NodeRemapTable;
3010 use crate::graph::unified::concurrent::CodeGraph;
3011 use crate::graph::unified::mutation_target::GraphMutationTarget;
3012
3013 let mut graph = CodeGraph::new();
3014 let nid_a = NodeId::new(101, 1);
3015 let nid_b = NodeId::new(202, 1);
3016 let file_a = FileId::new(7);
3017 let file_b = FileId::new(8);
3018
3019 let staged = vec![
3020 (file_a, macro_store_with(nid_a, "linux")),
3021 (file_b, macro_store_with(nid_b, "darwin")),
3022 ];
3023
3024 let remap = NodeRemapTable::default();
3025 let merged = phase4d_prime_propagate_staging_metadata(&mut graph, staged, &remap);
3026
3027 assert!(
3028 merged,
3029 "non-empty staged stores must report metadata_changed=true"
3030 );
3031 assert_eq!(
3032 GraphMutationTarget::macro_metadata_mut(&mut graph)
3033 .get_macro(nid_a)
3034 .and_then(|m| m.cfg_condition.clone()),
3035 Some("linux".to_string())
3036 );
3037 assert_eq!(
3038 GraphMutationTarget::macro_metadata_mut(&mut graph)
3039 .get_macro(nid_b)
3040 .and_then(|m| m.cfg_condition.clone()),
3041 Some("darwin".to_string())
3042 );
3043 }
3044
3045 #[test]
3046 fn phase4d_prime_drops_loser_metadata_before_merge() {
3047 // Pins 02_DESIGN §4.3.e Change 3 contract: when the unifier
3048 // tombstones a loser, its staged metadata must NOT survive into
3049 // the graph (the winner's own per-file store carries the
3050 // authoritative cfg_condition; 01_SPEC §5.3.f spec text).
3051 use super::super::unification::NodeRemapTable;
3052 use crate::graph::unified::concurrent::CodeGraph;
3053 use crate::graph::unified::mutation_target::GraphMutationTarget;
3054
3055 let mut graph = CodeGraph::new();
3056 let loser = NodeId::new(101, 1);
3057 let winner = NodeId::new(202, 1);
3058 let file_loser = FileId::new(7);
3059 let file_winner = FileId::new(8);
3060
3061 // Loser file stages `linux`, winner file stages `darwin`.
3062 let staged = vec![
3063 (file_loser, macro_store_with(loser, "linux")),
3064 (file_winner, macro_store_with(winner, "darwin")),
3065 ];
3066
3067 // Unifier marks `loser → winner`.
3068 let mut remap = NodeRemapTable::default();
3069 remap.insert(loser, winner);
3070
3071 let merged = phase4d_prime_propagate_staging_metadata(&mut graph, staged, &remap);
3072 assert!(
3073 merged,
3074 "winner's store still merges so metadata_changed=true"
3075 );
3076
3077 // The winner gets `darwin` from its own file's store. The loser
3078 // entry is dropped before merge — it never reaches the graph
3079 // under the winner key.
3080 assert_eq!(
3081 GraphMutationTarget::macro_metadata_mut(&mut graph)
3082 .get_macro(winner)
3083 .and_then(|m| m.cfg_condition.clone()),
3084 Some("darwin".to_string()),
3085 "winner's authoritative cfg_condition wins; loser's `linux` is dropped"
3086 );
3087 assert!(
3088 GraphMutationTarget::macro_metadata_mut(&mut graph)
3089 .get_macro(loser)
3090 .is_none(),
3091 "loser key has no metadata in the graph after Phase 4d-prime"
3092 );
3093 }
3094
3095 #[test]
3096 fn rekey_staging_metadata_to_arena_maps_local_to_arena() {
3097 // Stage metadata under staging-local NodeIds (i, 1) for i ∈ {0, 1, 2}
3098 // and confirm the rekeyed store carries the same payload under
3099 // the corresponding arena NodeIds drawn from per_file_node_ids.
3100 use crate::graph::unified::storage::metadata::{MacroNodeMetadata, NodeMetadataStore};
3101
3102 let mut staging = NodeMetadataStore::new();
3103 for (i, cond) in ["linux", "darwin", "windows"].iter().enumerate() {
3104 let m = MacroNodeMetadata {
3105 cfg_condition: Some((*cond).to_string()),
3106 ..Default::default()
3107 };
3108 staging.insert(NodeId::new(i as u32, 1), m);
3109 }
3110
3111 // Arena NodeIds — note generation 1 (the standard staging.add_node
3112 // contract) and arbitrary non-sequential arena slots.
3113 let arena_ids = vec![
3114 NodeId::new(100, 1),
3115 NodeId::new(101, 1),
3116 NodeId::new(102, 1),
3117 ];
3118
3119 let rekeyed = rekey_staging_metadata_to_arena(staging, &arena_ids);
3120
3121 assert_eq!(rekeyed.len(), 3);
3122 for (i, cond) in ["linux", "darwin", "windows"].iter().enumerate() {
3123 let m = rekeyed
3124 .get_macro(arena_ids[i])
3125 .expect("arena NodeId carries the remapped entry");
3126 assert_eq!(m.cfg_condition.as_deref(), Some(*cond));
3127 }
3128 // Original staging keys are gone (no longer in the rekeyed store).
3129 assert!(rekeyed.get_macro(NodeId::new(0, 1)).is_none());
3130 }
3131
3132 #[test]
3133 fn rekey_staging_metadata_drops_out_of_range_keys() {
3134 // Staging metadata keyed at index 5 but per_file_node_ids only has
3135 // 3 entries: the helper drops the stale key rather than panicking.
3136 use crate::graph::unified::storage::metadata::{MacroNodeMetadata, NodeMetadataStore};
3137
3138 let mut staging = NodeMetadataStore::new();
3139 let in_range = MacroNodeMetadata {
3140 cfg_condition: Some("good".to_string()),
3141 ..Default::default()
3142 };
3143 staging.insert(NodeId::new(0, 1), in_range);
3144
3145 let stale = MacroNodeMetadata {
3146 cfg_condition: Some("bad".to_string()),
3147 ..Default::default()
3148 };
3149 staging.insert(NodeId::new(5, 1), stale);
3150
3151 let arena_ids = vec![NodeId::new(100, 1)];
3152 let rekeyed = rekey_staging_metadata_to_arena(staging, &arena_ids);
3153
3154 assert_eq!(rekeyed.len(), 1, "stale out-of-range key dropped");
3155 assert_eq!(
3156 rekeyed
3157 .get_macro(NodeId::new(100, 1))
3158 .and_then(|m| m.cfg_condition.clone()),
3159 Some("good".to_string())
3160 );
3161 }
3162
3163 #[test]
3164 fn phase4d_prime_empty_staged_metadata_returns_false() {
3165 use super::super::unification::NodeRemapTable;
3166 use crate::graph::unified::concurrent::CodeGraph;
3167
3168 let mut graph = CodeGraph::new();
3169 let remap = NodeRemapTable::default();
3170 let merged = phase4d_prime_propagate_staging_metadata(&mut graph, Vec::new(), &remap);
3171 assert!(!merged, "no staged stores → metadata_changed=false");
3172 }
3173
3174 #[test]
3175 fn phase4d_prime_empty_store_after_loser_drop_returns_false() {
3176 // Single staged store that is ENTIRELY losers — after
3177 // `apply_to_metadata_store` drops them all, the store is empty
3178 // and `merge` should not be called.
3179 use super::super::unification::NodeRemapTable;
3180 use crate::graph::unified::concurrent::CodeGraph;
3181 use crate::graph::unified::mutation_target::GraphMutationTarget;
3182
3183 let mut graph = CodeGraph::new();
3184 let loser = NodeId::new(101, 1);
3185 let winner = NodeId::new(202, 1);
3186 let file_loser = FileId::new(7);
3187
3188 let staged = vec![(file_loser, macro_store_with(loser, "linux"))];
3189
3190 let mut remap = NodeRemapTable::default();
3191 remap.insert(loser, winner);
3192
3193 let merged = phase4d_prime_propagate_staging_metadata(&mut graph, staged, &remap);
3194
3195 assert!(
3196 !merged,
3197 "store collapsed to empty by loser-drop → no merge → metadata_changed=false"
3198 );
3199 assert!(
3200 GraphMutationTarget::macro_metadata_mut(&mut graph).is_empty(),
3201 "graph metadata store stays empty"
3202 );
3203 }
3204}