Skip to main content

sqry_core/graph/unified/build/
parallel_commit.rs

1//! Parallel commit pipeline for pre-allocated ID ranges.
2//!
3//! Replaces the serial commit loop with a four-phase pipeline:
4//! Phase 2: Count + range assignment via prefix sums
5//! Phase 3: Parallel commit into disjoint pre-allocated ranges
6//! Phase 4: String dedup, remap, index build, edge bulk insert
7//!
8//! # Phase 3 Architecture
9//!
10//! Phase 3 uses `split_at_mut` to carve disjoint sub-slices from pre-allocated
11//! arena and interner ranges, then uses `rayon` to commit each file's staging
12//! graph in parallel without locks:
13//!
14//! ```text
15//! NodeArena slots:   [   file0   |   file1   |   file2   ]
16//! StringInterner:    [   file0   |   file1   |   file2   ]
17//!                         ↑            ↑            ↑
18//!                    split_at_mut  split_at_mut  remainder
19//! ```
20//!
21//! Each file's `commit_single_file` receives its own disjoint slices and
22//! operates independently without contention.
23
24use std::collections::HashMap;
25use std::ops::Range;
26use std::sync::Arc;
27
28use rayon::prelude::*;
29
30use crate::graph::unified::edge::delta::{DeltaEdge, DeltaOp};
31use crate::graph::unified::edge::kind::{EdgeKind, MqProtocol};
32use crate::graph::unified::file::FileId;
33use crate::graph::unified::node::NodeId;
34use crate::graph::unified::storage::NodeArena;
35use crate::graph::unified::storage::arena::{NodeEntry, Slot};
36use crate::graph::unified::storage::interner::StringInterner;
37use crate::graph::unified::string::StringId;
38
39use super::pass3_intra::PendingEdge;
40use super::staging::{StagingGraph, StagingOp};
41
42/// Running offsets carried across chunks for deterministic ID assignment.
43///
44/// Each chunk's ranges begin where the previous chunk ended, ensuring
45/// globally unique, contiguous ID spaces.
46#[derive(Debug, Clone, Default)]
47pub struct GlobalOffsets {
48    /// Next available node slot index.
49    pub node_offset: u32,
50    /// Next available string slot index.
51    pub string_offset: u32,
52}
53
54/// Per-file commit plan with pre-assigned ID ranges.
55#[derive(Debug, Clone)]
56pub struct FilePlan {
57    /// Index into the chunk's `ParsedFile` vec.
58    pub parsed_index: usize,
59    /// Pre-assigned `FileId` from batch registration.
60    pub file_id: FileId,
61    /// Node slot range [start..end) in `NodeArena`.
62    pub node_range: Range<u32>,
63    /// String slot range [start..end) in `StringInterner`.
64    pub string_range: Range<u32>,
65}
66
67/// Plan for parallel commit of a single chunk.
68#[derive(Debug, Clone)]
69pub struct ChunkCommitPlan {
70    /// Per-file plans in deterministic file order.
71    pub file_plans: Vec<FilePlan>,
72    /// Total nodes across all files in this chunk.
73    pub total_nodes: u32,
74    /// Total strings across all files in this chunk.
75    pub total_strings: u32,
76    /// Total edges across all files in this chunk.
77    pub total_edges: u64,
78}
79
80/// Compute commit plan from parsed files using prefix-sum range assignment.
81///
82/// Each file gets contiguous, non-overlapping ranges for nodes and strings.
83/// Ranges start from the given global offsets, which carry forward across
84/// chunks.
85///
86/// # Arguments
87///
88/// * `node_counts` - Per-file node counts (from `StagingGraph::node_count_u32()`)
89/// * `string_counts` - Per-file string counts
90/// * `edge_counts` - Per-file edge counts (used for `total_edges` only)
91/// * `file_ids` - Pre-assigned `FileId`s from batch registration
92/// * `node_offset` - Running global node offset across chunks
93/// * `string_offset` - Running global string offset across chunks
94///
95/// # Panics
96///
97/// Panics in debug builds if the per-chunk accounting arrays do not have
98/// identical lengths.
99#[must_use]
100pub fn compute_commit_plan(
101    node_counts: &[u32],
102    string_counts: &[u32],
103    edge_counts: &[u32],
104    file_ids: &[FileId],
105    node_offset: u32,
106    string_offset: u32,
107) -> ChunkCommitPlan {
108    debug_assert_eq!(node_counts.len(), string_counts.len());
109    debug_assert_eq!(node_counts.len(), edge_counts.len());
110    debug_assert_eq!(node_counts.len(), file_ids.len());
111
112    let mut plans = Vec::with_capacity(node_counts.len());
113    let mut node_cursor = node_offset;
114    let mut string_cursor = string_offset;
115    let mut total_edges: u64 = 0;
116
117    for i in 0..node_counts.len() {
118        let nc = node_counts[i];
119        let sc = string_counts[i];
120
121        let node_end = node_cursor
122            .checked_add(nc)
123            .expect("node ID space overflow in commit plan");
124        let string_end = string_cursor
125            .checked_add(sc)
126            .expect("string ID space overflow in commit plan");
127
128        plans.push(FilePlan {
129            parsed_index: i,
130            file_id: file_ids[i],
131            node_range: node_cursor..node_end,
132            string_range: string_cursor..string_end,
133        });
134
135        node_cursor = node_end;
136        string_cursor = string_end;
137        total_edges += u64::from(edge_counts[i]);
138    }
139
140    ChunkCommitPlan {
141        file_plans: plans,
142        total_nodes: node_cursor - node_offset,
143        total_strings: string_cursor - string_offset,
144        total_edges,
145    }
146}
147
148/// Execute Phase 2: count + range assignment for a parsed chunk.
149///
150/// Extracts per-file counts from staging graphs and delegates to
151/// [`compute_commit_plan`] for prefix-sum range assignment.
152#[must_use]
153pub fn phase2_assign_ranges(
154    staging_graphs: &[&StagingGraph],
155    file_ids: &[FileId],
156    offsets: &GlobalOffsets,
157) -> ChunkCommitPlan {
158    let node_counts: Vec<u32> = staging_graphs
159        .iter()
160        .map(|sg| sg.node_count_u32())
161        .collect();
162    let string_counts: Vec<u32> = staging_graphs
163        .iter()
164        .map(|sg| sg.string_count_u32())
165        .collect();
166    let edge_counts: Vec<u32> = staging_graphs
167        .iter()
168        .map(|sg| sg.edge_count_u32())
169        .collect();
170
171    compute_commit_plan(
172        &node_counts,
173        &string_counts,
174        &edge_counts,
175        file_ids,
176        offsets.node_offset,
177        offsets.string_offset,
178    )
179}
180
181/// Phase 3 result: per-file edges and total written counts for validation.
182pub struct Phase3Result {
183    /// Per-file edge collections for Phase 4 bulk insert.
184    pub per_file_edges: Vec<Vec<PendingEdge>>,
185    /// Total nodes actually written (for validation against planned totals).
186    pub total_nodes_written: usize,
187    /// Total strings actually written (for validation against planned totals).
188    pub total_strings_written: usize,
189    /// Total edges collected across all files.
190    pub total_edges_collected: usize,
191}
192
193/// Execute Phase 3: parallel commit into disjoint pre-allocated ranges.
194///
195/// Pre-splits arena and interner slices into per-file disjoint sub-slices
196/// using `split_at_mut`, then uses `rayon` `par_iter` for lock-free parallel
197/// writes. Each file's staging graph is committed independently.
198///
199/// Returns [`Phase3Result`] with per-file edges and written counts so the
200/// caller can validate against plan totals and truncate allocations on
201/// mismatch.
202///
203/// # Panics
204///
205/// Panics if `plan.total_nodes` or `plan.total_strings` exceeds the
206/// pre-allocated range in the arena or interner.
207#[must_use]
208pub fn phase3_parallel_commit(
209    plan: &ChunkCommitPlan,
210    staging_graphs: &[&StagingGraph],
211    arena: &mut NodeArena,
212    interner: &mut StringInterner,
213) -> Phase3Result {
214    if plan.file_plans.is_empty() {
215        return Phase3Result {
216            per_file_edges: Vec::new(),
217            total_nodes_written: 0,
218            total_strings_written: 0,
219            total_edges_collected: 0,
220        };
221    }
222
223    // Determine the start of the pre-allocated ranges.
224    let node_start = plan.file_plans[0].node_range.start;
225    let string_start = plan.file_plans[0].string_range.start;
226
227    // Get mutable slices covering the entire pre-allocated region.
228    let node_slice = arena.bulk_slice_mut(node_start, plan.total_nodes);
229    let (str_slice, rc_slice) = interner.bulk_slices_mut(string_start, plan.total_strings);
230
231    // Pre-split into per-file disjoint sub-slices using split_at_mut.
232    let mut node_remaining = &mut *node_slice;
233    let mut str_remaining = &mut *str_slice;
234    let mut rc_remaining = &mut *rc_slice;
235
236    #[allow(clippy::type_complexity)]
237    let mut file_work: Vec<(
238        &mut [Slot<NodeEntry>],
239        &mut [Option<Arc<str>>],
240        &mut [u32],
241        &FilePlan,
242        usize,
243    )> = Vec::with_capacity(plan.file_plans.len());
244
245    for (i, file_plan) in plan.file_plans.iter().enumerate() {
246        let nc = (file_plan.node_range.end - file_plan.node_range.start) as usize;
247        let sc = (file_plan.string_range.end - file_plan.string_range.start) as usize;
248
249        let (n, nr) = node_remaining.split_at_mut(nc);
250        let (s, sr) = str_remaining.split_at_mut(sc);
251        let (r, rr) = rc_remaining.split_at_mut(sc);
252
253        file_work.push((n, s, r, file_plan, i));
254        node_remaining = nr;
255        str_remaining = sr;
256        rc_remaining = rr;
257    }
258
259    // Parallel commit — each closure owns disjoint slices, no contention.
260    let results: Vec<FileCommitResult> = file_work
261        .into_par_iter()
262        .map(|(node_slots, str_slots, rc_slots, file_plan, idx)| {
263            commit_single_file(
264                staging_graphs[idx],
265                file_plan,
266                node_slots,
267                str_slots,
268                rc_slots,
269            )
270        })
271        .collect();
272
273    let total_nodes_written: usize = results.iter().map(|r| r.nodes_written).sum();
274    let total_strings_written: usize = results.iter().map(|r| r.strings_written).sum();
275    let total_edges_collected: usize = results.iter().map(|r| r.edges.len()).sum();
276    let per_file_edges = results.into_iter().map(|r| r.edges).collect();
277
278    Phase3Result {
279        per_file_edges,
280        total_nodes_written,
281        total_strings_written,
282        total_edges_collected,
283    }
284}
285
286/// Commit a single file's staging graph into pre-allocated disjoint ranges.
287///
288/// This function operates on slices that belong exclusively to this file,
289/// so it requires no locks or synchronization.
290///
291/// # Steps
292///
293/// 1. **Strings**: Extract `InternString` ops, write `Arc<str>` values into
294///    pre-allocated string slots, build local→global `StringId` remap.
295/// 2. **Nodes**: Extract `AddNode` ops, apply string remap to each `NodeEntry`,
296///    set `file_id`, write into pre-allocated node slots, build expected→actual
297///    `NodeId` remap.
298/// 3. **Edges**: Extract `AddEdge` ops, apply node ID remap to source/target,
299///    assign pre-computed sequence numbers, return as `PendingEdge` vec.
300// Result of committing a single file: edges + actual written counts.
301struct FileCommitResult {
302    edges: Vec<PendingEdge>,
303    nodes_written: usize,
304    strings_written: usize,
305}
306
307fn commit_single_file(
308    staging: &StagingGraph,
309    plan: &FilePlan,
310    node_slots: &mut [Slot<NodeEntry>],
311    str_slots: &mut [Option<Arc<str>>],
312    rc_slots: &mut [u32],
313) -> FileCommitResult {
314    let ops = staging.operations();
315
316    // --- Step 1: Write strings, build local→global remap ---
317    let (string_remap, strings_written) = write_strings(ops, plan, str_slots, rc_slots);
318
319    // --- Step 2: Write nodes, build expected→actual node ID remap ---
320    let (node_remap, nodes_written) = write_nodes(ops, plan, node_slots, &string_remap);
321
322    // --- Step 3: Collect remapped edges with pre-assigned sequence numbers ---
323    let edges = collect_edges(ops, plan, &node_remap, &string_remap);
324
325    FileCommitResult {
326        edges,
327        nodes_written,
328        strings_written,
329    }
330}
331
332/// Write staged strings into pre-allocated interner slots.
333///
334/// Validates that each `InternString` op has a local `StringId` and that
335/// no duplicate local IDs exist (matching the serial `commit_strings` checks).
336///
337/// Returns `(remap, strings_written)`.
338fn write_strings(
339    ops: &[StagingOp],
340    plan: &FilePlan,
341    str_slots: &mut [Option<Arc<str>>],
342    rc_slots: &mut [u32],
343) -> (HashMap<StringId, StringId>, usize) {
344    let mut remap = HashMap::new();
345    let mut string_cursor = 0usize;
346
347    for op in ops {
348        if let StagingOp::InternString { local_id, value } = op {
349            // Validate: only local IDs are allowed in staging (matching serial commit_strings)
350            assert!(
351                local_id.is_local(),
352                "non-local StringId {:?} in InternString op for file {:?}",
353                local_id,
354                plan.file_id,
355            );
356            // Validate: no duplicate local IDs (matching serial commit_strings)
357            assert!(
358                !remap.contains_key(local_id),
359                "duplicate local StringId {:?} in InternString op for file {:?}",
360                local_id,
361                plan.file_id,
362            );
363
364            if string_cursor >= str_slots.len() {
365                log::warn!(
366                    "string slot overflow in file {:?}: cursor={string_cursor}, slots={}, skipping remaining strings",
367                    plan.file_id,
368                    str_slots.len()
369                );
370                break;
371            }
372
373            // The global StringId for this string is the pre-allocated slot index.
374            #[allow(clippy::cast_possible_truncation)] // cursor is bounded by allocated slot count
375            let global_id = StringId::new(plan.string_range.start + string_cursor as u32);
376
377            // Write the string into the pre-allocated slot.
378            str_slots[string_cursor] = Some(Arc::from(value.as_str()));
379            rc_slots[string_cursor] = 1;
380
381            remap.insert(*local_id, global_id);
382            string_cursor += 1;
383        }
384    }
385
386    (remap, string_cursor)
387}
388
389/// Remap all `StringId` fields in a `NodeEntry` using a local→global table.
390///
391/// Required field (`name`) is always remapped if local.
392/// Optional fields (`signature`, `doc`, `qualified_name`, `visibility`)
393/// are remapped if present and local.
394fn remap_node_entry_string_ids(entry: &mut NodeEntry, remap: &HashMap<StringId, StringId>) {
395    remap_required_local(&mut entry.name, remap);
396    remap_option_local(&mut entry.signature, remap);
397    remap_option_local(&mut entry.doc, remap);
398    remap_option_local(&mut entry.qualified_name, remap);
399    remap_option_local(&mut entry.visibility, remap);
400}
401
402/// Remap all local `StringId` fields in an `EdgeKind`.
403///
404/// Uses the same exhaustive match as `remap_edge_kind_string_ids`, but
405/// only remaps local IDs (those with `LOCAL_TAG_BIT` set).
406#[allow(clippy::match_same_arms)]
407fn remap_edge_kind_local_string_ids(kind: &mut EdgeKind, remap: &HashMap<StringId, StringId>) {
408    match kind {
409        EdgeKind::Imports { alias, .. } => remap_option_local(alias, remap),
410        EdgeKind::Exports { alias, .. } => remap_option_local(alias, remap),
411        EdgeKind::TypeOf { name, .. } => remap_option_local(name, remap),
412        EdgeKind::TraitMethodBinding {
413            trait_name,
414            impl_type,
415            ..
416        } => {
417            remap_required_local(trait_name, remap);
418            remap_required_local(impl_type, remap);
419        }
420        EdgeKind::HttpRequest { url, .. } => remap_option_local(url, remap),
421        EdgeKind::GrpcCall { service, method } => {
422            remap_required_local(service, remap);
423            remap_required_local(method, remap);
424        }
425        EdgeKind::DbQuery { table, .. } => remap_option_local(table, remap),
426        EdgeKind::TableRead { table_name, schema } => {
427            remap_required_local(table_name, remap);
428            remap_option_local(schema, remap);
429        }
430        EdgeKind::TableWrite {
431            table_name, schema, ..
432        } => {
433            remap_required_local(table_name, remap);
434            remap_option_local(schema, remap);
435        }
436        EdgeKind::TriggeredBy {
437            trigger_name,
438            schema,
439        } => {
440            remap_required_local(trigger_name, remap);
441            remap_option_local(schema, remap);
442        }
443        EdgeKind::MessageQueue { protocol, topic } => {
444            if let MqProtocol::Other(s) = protocol {
445                remap_required_local(s, remap);
446            }
447            remap_option_local(topic, remap);
448        }
449        EdgeKind::WebSocket { event } => remap_option_local(event, remap),
450        EdgeKind::GraphQLOperation { operation } => remap_required_local(operation, remap),
451        EdgeKind::ProcessExec { command } => remap_required_local(command, remap),
452        EdgeKind::FileIpc { path_pattern } => remap_option_local(path_pattern, remap),
453        EdgeKind::ProtocolCall { protocol, metadata } => {
454            remap_required_local(protocol, remap);
455            remap_option_local(metadata, remap);
456        }
457        // Variants without StringId fields — exhaustive, no wildcard.
458        EdgeKind::Defines => {}
459        EdgeKind::Contains => {}
460        EdgeKind::Calls { .. } => {}
461        EdgeKind::References => {}
462        EdgeKind::Inherits => {}
463        EdgeKind::Implements => {}
464        EdgeKind::LifetimeConstraint { .. } => {}
465        EdgeKind::MacroExpansion { .. } => {}
466        EdgeKind::FfiCall { .. } => {}
467        EdgeKind::WebAssemblyCall => {}
468    }
469}
470
471/// Remap a required local `StringId` in place.
472///
473/// Panics if a local ID has no mapping, matching the serial
474/// `apply_string_remap` behavior that returned `UnmappedLocalStringId`.
475fn remap_required_local(id: &mut StringId, remap: &HashMap<StringId, StringId>) {
476    if id.is_local() {
477        let global = remap.get(id).unwrap_or_else(|| {
478            panic!("unmapped local StringId {id:?} — missing intern_string op?")
479        });
480        *id = *global;
481    }
482}
483
484/// Remap an optional local `StringId` in place.
485fn remap_option_local(opt: &mut Option<StringId>, remap: &HashMap<StringId, StringId>) {
486    if let Some(id) = opt
487        && id.is_local()
488    {
489        let global = remap.get(id).unwrap_or_else(|| {
490            panic!("unmapped local StringId {id:?} — missing intern_string op?")
491        });
492        *id = *global;
493    }
494}
495
496/// Write staged nodes into pre-allocated arena slots.
497///
498/// Returns `(remap, nodes_written)`.
499fn write_nodes(
500    ops: &[StagingOp],
501    plan: &FilePlan,
502    node_slots: &mut [Slot<NodeEntry>],
503    string_remap: &HashMap<StringId, StringId>,
504) -> (HashMap<NodeId, NodeId>, usize) {
505    let mut node_remap = HashMap::new();
506    let mut node_cursor = 0usize;
507
508    for op in ops {
509        if let StagingOp::AddNode {
510            entry, expected_id, ..
511        } = op
512        {
513            if node_cursor >= node_slots.len() {
514                log::warn!(
515                    "node slot overflow in file {:?}: cursor={node_cursor}, slots={}, skipping remaining nodes",
516                    plan.file_id,
517                    node_slots.len()
518                );
519                break;
520            }
521
522            let mut entry = entry.clone();
523
524            // Apply string remap to all StringId fields in the entry.
525            remap_node_entry_string_ids(&mut entry, string_remap);
526
527            // Set the file ID from the plan.
528            entry.file = plan.file_id;
529
530            // The actual NodeId is the pre-allocated slot index with generation 1.
531            #[allow(clippy::cast_possible_truncation)] // cursor is bounded by allocated slot count
532            let actual_index = plan.node_range.start + node_cursor as u32;
533            let actual_id = NodeId::new(actual_index, 1);
534
535            // Write into the pre-allocated slot.
536            node_slots[node_cursor] = Slot::new_occupied(1, entry);
537
538            if let Some(expected) = expected_id {
539                node_remap.insert(*expected, actual_id);
540            }
541
542            node_cursor += 1;
543        }
544    }
545
546    (node_remap, node_cursor)
547}
548
549/// Collect staged edges with remapped node IDs, string IDs, and pre-assigned
550/// sequence numbers.
551fn collect_edges(
552    ops: &[StagingOp],
553    plan: &FilePlan,
554    node_remap: &HashMap<NodeId, NodeId>,
555    string_remap: &HashMap<StringId, StringId>,
556) -> Vec<PendingEdge> {
557    let mut edges = Vec::new();
558
559    for op in ops {
560        if let StagingOp::AddEdge {
561            source,
562            target,
563            kind,
564            spans,
565            ..
566        } = op
567        {
568            let actual_source = node_remap.get(source).copied().unwrap_or(*source);
569            let actual_target = node_remap.get(target).copied().unwrap_or(*target);
570
571            // Clone and remap any local StringIds in the EdgeKind.
572            let mut remapped_kind = kind.clone();
573            remap_edge_kind_local_string_ids(&mut remapped_kind, string_remap);
574
575            edges.push(PendingEdge {
576                source: actual_source,
577                target: actual_target,
578                kind: remapped_kind,
579                file: plan.file_id,
580                spans: spans.clone(),
581            });
582        }
583    }
584
585    edges
586}
587
588/// Remap a required `StringId` using the dedup remap table.
589///
590/// If the ID is in the remap table, it is replaced with the canonical ID.
591/// Otherwise, it is left unchanged (identity mapping).
592#[allow(clippy::implicit_hasher)]
593pub fn remap_string_id(id: &mut StringId, remap: &HashMap<StringId, StringId>) {
594    if let Some(&canonical) = remap.get(id) {
595        *id = canonical;
596    }
597}
598
599/// Remap an optional `StringId` using the dedup remap table.
600#[allow(clippy::implicit_hasher)]
601pub fn remap_option_string_id(id: &mut Option<StringId>, remap: &HashMap<StringId, StringId>) {
602    if let Some(inner) = id {
603        remap_string_id(inner, remap);
604    }
605}
606
607/// Exhaustive remap of all `StringId` fields in an `EdgeKind`.
608///
609/// No wildcard arm — the compiler ensures completeness when new variants
610/// are added to `EdgeKind`.
611#[allow(clippy::match_same_arms, clippy::implicit_hasher)] // Arms are separated by category for documentation clarity
612pub fn remap_edge_kind_string_ids(kind: &mut EdgeKind, remap: &HashMap<StringId, StringId>) {
613    match kind {
614        // === Variants WITH StringId fields ===
615        EdgeKind::Imports { alias, .. } => remap_option_string_id(alias, remap),
616        EdgeKind::Exports { alias, .. } => remap_option_string_id(alias, remap),
617        EdgeKind::TypeOf { name, .. } => remap_option_string_id(name, remap),
618        EdgeKind::TraitMethodBinding {
619            trait_name,
620            impl_type,
621            ..
622        } => {
623            remap_string_id(trait_name, remap);
624            remap_string_id(impl_type, remap);
625        }
626        EdgeKind::HttpRequest { url, .. } => remap_option_string_id(url, remap),
627        EdgeKind::GrpcCall { service, method } => {
628            remap_string_id(service, remap);
629            remap_string_id(method, remap);
630        }
631        EdgeKind::DbQuery { table, .. } => remap_option_string_id(table, remap),
632        EdgeKind::TableRead { table_name, schema } => {
633            remap_string_id(table_name, remap);
634            remap_option_string_id(schema, remap);
635        }
636        EdgeKind::TableWrite {
637            table_name, schema, ..
638        } => {
639            remap_string_id(table_name, remap);
640            remap_option_string_id(schema, remap);
641        }
642        EdgeKind::TriggeredBy {
643            trigger_name,
644            schema,
645        } => {
646            remap_string_id(trigger_name, remap);
647            remap_option_string_id(schema, remap);
648        }
649        EdgeKind::MessageQueue { protocol, topic } => {
650            if let MqProtocol::Other(s) = protocol {
651                remap_string_id(s, remap);
652            }
653            remap_option_string_id(topic, remap);
654        }
655        EdgeKind::WebSocket { event } => remap_option_string_id(event, remap),
656        EdgeKind::GraphQLOperation { operation } => remap_string_id(operation, remap),
657        EdgeKind::ProcessExec { command } => remap_string_id(command, remap),
658        EdgeKind::FileIpc { path_pattern } => remap_option_string_id(path_pattern, remap),
659        EdgeKind::ProtocolCall { protocol, metadata } => {
660            remap_string_id(protocol, remap);
661            remap_option_string_id(metadata, remap);
662        }
663        // === Variants WITHOUT StringId fields — exhaustive, no wildcard ===
664        EdgeKind::Defines => {}
665        EdgeKind::Contains => {}
666        EdgeKind::Calls { .. } => {}
667        EdgeKind::References => {}
668        EdgeKind::Inherits => {}
669        EdgeKind::Implements => {}
670        EdgeKind::LifetimeConstraint { .. } => {}
671        EdgeKind::MacroExpansion { .. } => {}
672        EdgeKind::FfiCall { .. } => {}
673        EdgeKind::WebAssemblyCall => {}
674    }
675}
676
677// === Phase 4: Post-chunk Finalization ===
678
679/// Apply global string dedup remap to all `StringId` fields in a `NodeEntry`.
680///
681/// This is the Phase 4 counterpart to `remap_node_entry_string_ids` (Phase 3).
682/// Phase 3 remaps local→global; Phase 4 remaps duplicate global→canonical global.
683#[allow(clippy::implicit_hasher)]
684pub fn remap_node_entry_global(entry: &mut NodeEntry, remap: &HashMap<StringId, StringId>) {
685    remap_string_id(&mut entry.name, remap);
686    remap_option_string_id(&mut entry.signature, remap);
687    remap_option_string_id(&mut entry.doc, remap);
688    remap_option_string_id(&mut entry.qualified_name, remap);
689    remap_option_string_id(&mut entry.visibility, remap);
690}
691
692/// Apply global string dedup remap to all nodes in the arena and all pending edges.
693///
694/// This is Phase 4b of the parallel commit pipeline. After `build_dedup_table()`
695/// produces a remap table, this function applies it to every `StringId` in:
696/// - All `NodeEntry` fields in the arena
697/// - All `EdgeKind` fields in the pending edges
698#[allow(clippy::implicit_hasher)]
699pub fn phase4_apply_global_remap(
700    arena: &mut NodeArena,
701    all_edges: &mut [Vec<PendingEdge>],
702    remap: &HashMap<StringId, StringId>,
703) {
704    if remap.is_empty() {
705        return;
706    }
707
708    // Remap all nodes
709    for (_id, entry) in arena.iter_mut() {
710        remap_node_entry_global(entry, remap);
711    }
712
713    // Remap all edges
714    for file_edges in all_edges.iter_mut() {
715        for edge in file_edges.iter_mut() {
716            remap_edge_kind_string_ids(&mut edge.kind, remap);
717        }
718    }
719}
720
721/// Convert per-file `PendingEdge` collections to per-file `DeltaEdge` collections
722/// with monotonically increasing sequence numbers.
723///
724/// The sequence numbers are assigned file-by-file, edge-by-edge, starting from
725/// `seq_start`. This produces the deterministic ordering required by
726/// `BidirectionalEdgeStore::add_edges_bulk_ordered()`.
727#[must_use]
728pub fn pending_edges_to_delta(
729    per_file_edges: &[Vec<PendingEdge>],
730    seq_start: u64,
731) -> (Vec<Vec<DeltaEdge>>, u64) {
732    let mut seq = seq_start;
733    let mut result = Vec::with_capacity(per_file_edges.len());
734
735    for file_edges in per_file_edges {
736        let mut delta_vec = Vec::with_capacity(file_edges.len());
737        for edge in file_edges {
738            delta_vec.push(DeltaEdge::with_spans(
739                edge.source,
740                edge.target,
741                edge.kind.clone(),
742                seq,
743                DeltaOp::Add,
744                edge.file,
745                edge.spans.clone(),
746            ));
747            seq += 1;
748        }
749        result.push(delta_vec);
750    }
751
752    (result, seq)
753}
754
755#[cfg(test)]
756mod tests {
757    use super::*;
758
759    #[test]
760    fn test_compute_commit_plan_basic() {
761        let file_ids = vec![FileId::new(0), FileId::new(1), FileId::new(2)];
762        let node_counts = vec![3, 0, 5];
763        let string_counts = vec![2, 1, 3];
764        let edge_counts = vec![4, 0, 6];
765
766        let plan = compute_commit_plan(
767            &node_counts,
768            &string_counts,
769            &edge_counts,
770            &file_ids,
771            0,
772            1, // string_offset=1 for sentinel
773        );
774
775        assert_eq!(plan.total_nodes, 8);
776        assert_eq!(plan.total_strings, 6);
777        assert_eq!(plan.total_edges, 10);
778
779        // File 0: nodes [0..3), strings [1..3)
780        assert_eq!(plan.file_plans[0].node_range, 0..3);
781        assert_eq!(plan.file_plans[0].string_range, 1..3);
782
783        // File 1: nodes [3..3), strings [3..4) — empty nodes
784        assert_eq!(plan.file_plans[1].node_range, 3..3);
785        assert_eq!(plan.file_plans[1].string_range, 3..4);
786
787        // File 2: nodes [3..8), strings [4..7)
788        assert_eq!(plan.file_plans[2].node_range, 3..8);
789        assert_eq!(plan.file_plans[2].string_range, 4..7);
790    }
791
792    #[test]
793    fn test_compute_commit_plan_with_offsets() {
794        let file_ids = vec![FileId::new(5)];
795        let plan = compute_commit_plan(&[10], &[5], &[7], &file_ids, 100, 50);
796        assert_eq!(plan.file_plans[0].node_range, 100..110);
797        assert_eq!(plan.file_plans[0].string_range, 50..55);
798        assert_eq!(plan.total_nodes, 10);
799        assert_eq!(plan.total_strings, 5);
800        assert_eq!(plan.total_edges, 7);
801    }
802
803    #[test]
804    fn test_compute_commit_plan_empty() {
805        let plan = compute_commit_plan(&[], &[], &[], &[], 0, 1);
806        assert_eq!(plan.total_nodes, 0);
807        assert_eq!(plan.total_strings, 0);
808        assert_eq!(plan.total_edges, 0);
809        assert!(plan.file_plans.is_empty());
810    }
811
812    #[test]
813    fn test_remap_string_id_basic() {
814        let mut remap = HashMap::new();
815        remap.insert(StringId::new(1), StringId::new(100));
816
817        let mut id = StringId::new(1);
818        remap_string_id(&mut id, &remap);
819        assert_eq!(id, StringId::new(100));
820    }
821
822    #[test]
823    fn test_remap_string_id_not_in_remap() {
824        let remap = HashMap::new();
825        let mut id = StringId::new(42);
826        remap_string_id(&mut id, &remap);
827        assert_eq!(id, StringId::new(42)); // unchanged
828    }
829
830    #[test]
831    fn test_remap_option_string_id() {
832        let mut remap = HashMap::new();
833        remap.insert(StringId::new(5), StringId::new(50));
834
835        let mut some_id = Some(StringId::new(5));
836        remap_option_string_id(&mut some_id, &remap);
837        assert_eq!(some_id, Some(StringId::new(50)));
838
839        let mut none_id: Option<StringId> = None;
840        remap_option_string_id(&mut none_id, &remap);
841        assert_eq!(none_id, None);
842    }
843
844    #[test]
845    fn test_remap_edge_kind_imports() {
846        let mut remap = HashMap::new();
847        remap.insert(StringId::new(1), StringId::new(100));
848
849        let mut kind = EdgeKind::Imports {
850            alias: Some(StringId::new(1)),
851            is_wildcard: false,
852        };
853        remap_edge_kind_string_ids(&mut kind, &remap);
854        assert!(
855            matches!(kind, EdgeKind::Imports { alias: Some(id), .. } if id == StringId::new(100))
856        );
857    }
858
859    #[test]
860    fn test_remap_edge_kind_trait_method_binding() {
861        let mut remap = HashMap::new();
862        remap.insert(StringId::new(1), StringId::new(100));
863        remap.insert(StringId::new(2), StringId::new(200));
864
865        let mut kind = EdgeKind::TraitMethodBinding {
866            trait_name: StringId::new(1),
867            impl_type: StringId::new(2),
868            is_ambiguous: false,
869        };
870        remap_edge_kind_string_ids(&mut kind, &remap);
871        assert!(
872            matches!(kind, EdgeKind::TraitMethodBinding { trait_name, impl_type, .. }
873                if trait_name == StringId::new(100) && impl_type == StringId::new(200))
874        );
875    }
876
877    #[test]
878    fn test_remap_edge_kind_no_op_variants() {
879        let remap = HashMap::new();
880
881        // Defines — no StringId fields
882        let mut kind = EdgeKind::Defines;
883        remap_edge_kind_string_ids(&mut kind, &remap);
884        assert!(matches!(kind, EdgeKind::Defines));
885
886        // Calls — no StringId fields
887        let mut kind = EdgeKind::Calls {
888            argument_count: 3,
889            is_async: true,
890        };
891        remap_edge_kind_string_ids(&mut kind, &remap);
892        assert!(matches!(
893            kind,
894            EdgeKind::Calls {
895                argument_count: 3,
896                is_async: true,
897            }
898        ));
899    }
900
901    fn placeholder_entry() -> NodeEntry {
902        use crate::graph::unified::node::NodeKind;
903        NodeEntry::new(NodeKind::Function, StringId::new(0), FileId::new(0))
904    }
905
906    #[test]
907    fn test_phase2_assign_ranges_basic() {
908        use super::super::staging::StagingGraph;
909
910        // Create 2 staging graphs with known counts
911        let mut sg0 = StagingGraph::new();
912        let mut sg1 = StagingGraph::new();
913
914        // sg0: 2 nodes, 1 string, 1 edge
915        let entry0 = placeholder_entry();
916        let n0 = sg0.add_node(entry0.clone());
917        let n1 = sg0.add_node(entry0.clone());
918        sg0.intern_string(StringId::new_local(0), "hello".into());
919        sg0.add_edge(
920            n0,
921            n1,
922            EdgeKind::Calls {
923                argument_count: 0,
924                is_async: false,
925            },
926            FileId::new(0),
927        );
928
929        // sg1: 1 node, 2 strings, 0 edges
930        sg1.add_node(entry0);
931        sg1.intern_string(StringId::new_local(0), "world".into());
932        sg1.intern_string(StringId::new_local(1), "foo".into());
933
934        let file_ids = vec![FileId::new(10), FileId::new(11)];
935        let offsets = GlobalOffsets {
936            node_offset: 5,
937            string_offset: 3,
938        };
939
940        let plan = phase2_assign_ranges(&[&sg0, &sg1], &file_ids, &offsets);
941
942        // sg0: 2 nodes, 1 string, 1 edge
943        assert_eq!(plan.file_plans[0].node_range, 5..7);
944        assert_eq!(plan.file_plans[0].string_range, 3..4);
945
946        // sg1: 1 node, 2 strings, 0 edges
947        assert_eq!(plan.file_plans[1].node_range, 7..8);
948        assert_eq!(plan.file_plans[1].string_range, 4..6);
949
950        assert_eq!(plan.total_nodes, 3);
951        assert_eq!(plan.total_strings, 3);
952        assert_eq!(plan.total_edges, 1);
953    }
954
955    #[test]
956    fn test_phase3_parallel_commit_basic() {
957        use super::super::staging::StagingGraph;
958        use crate::graph::unified::node::NodeKind;
959        use crate::graph::unified::storage::NodeArena;
960        use crate::graph::unified::storage::interner::StringInterner;
961
962        // Create a staging graph with 2 nodes, 1 string, 1 edge
963        let mut sg = StagingGraph::new();
964        let local_name = StringId::new_local(0);
965        sg.intern_string(local_name, "my_func".into());
966
967        let entry = NodeEntry::new(NodeKind::Function, local_name, FileId::new(0));
968        let n0 = sg.add_node(entry.clone());
969
970        let entry2 = NodeEntry::new(NodeKind::Variable, local_name, FileId::new(0));
971        let n1 = sg.add_node(entry2);
972
973        sg.add_edge(
974            n0,
975            n1,
976            EdgeKind::Calls {
977                argument_count: 0,
978                is_async: false,
979            },
980            FileId::new(0),
981        );
982
983        let file_ids = vec![FileId::new(5)];
984
985        // Pre-allocate with non-zero offsets to verify remap works.
986        let mut arena = NodeArena::new();
987        let mut interner = StringInterner::new();
988
989        // Pre-fill some slots so our file starts at a non-zero offset.
990        arena.alloc_range(10, &placeholder_entry()).unwrap();
991        let string_start = interner.alloc_range(1).unwrap();
992        assert_eq!(string_start, 1); // past sentinel
993
994        let offsets = GlobalOffsets {
995            node_offset: 10, // file's nodes start at index 10
996            string_offset: string_start,
997        };
998        let plan = phase2_assign_ranges(&[&sg], &file_ids, &offsets);
999        assert_eq!(plan.file_plans[0].node_range, 10..12);
1000
1001        // Pre-allocate the actual ranges for Phase 3.
1002        arena
1003            .alloc_range(plan.total_nodes, &placeholder_entry())
1004            .unwrap();
1005        interner.alloc_range(plan.total_strings).unwrap();
1006
1007        // Phase 3
1008        let result = phase3_parallel_commit(&plan, &[&sg], &mut arena, &mut interner);
1009
1010        // Verify written counts
1011        assert_eq!(result.total_nodes_written, 2);
1012        assert_eq!(result.total_strings_written, 1);
1013
1014        // Verify strings were written
1015        let global_name = StringId::new(string_start);
1016        assert_eq!(&*interner.resolve(global_name).unwrap(), "my_func");
1017
1018        // Verify 1 file, 1 edge
1019        assert_eq!(result.per_file_edges.len(), 1);
1020        assert_eq!(result.per_file_edges[0].len(), 1);
1021
1022        // Verify edge was remapped to global IDs (node_offset=10)
1023        let edge = &result.per_file_edges[0][0];
1024        assert_eq!(edge.file, FileId::new(5));
1025        assert_eq!(edge.source, NodeId::new(10, 1)); // first node at slot 10
1026        assert_eq!(edge.target, NodeId::new(11, 1)); // second node at slot 11
1027    }
1028
1029    #[test]
1030    fn test_phase3_parallel_commit_empty() {
1031        use crate::graph::unified::storage::NodeArena;
1032        use crate::graph::unified::storage::interner::StringInterner;
1033
1034        let mut arena = NodeArena::new();
1035        let mut interner = StringInterner::new();
1036
1037        let plan = ChunkCommitPlan {
1038            file_plans: vec![],
1039            total_nodes: 0,
1040            total_strings: 0,
1041            total_edges: 0,
1042        };
1043
1044        let result = phase3_parallel_commit(&plan, &[], &mut arena, &mut interner);
1045        assert!(result.per_file_edges.is_empty());
1046        assert_eq!(result.total_nodes_written, 0);
1047        assert_eq!(result.total_strings_written, 0);
1048    }
1049
1050    #[test]
1051    fn test_commit_single_file_string_remap() {
1052        use super::super::staging::StagingGraph;
1053        use crate::graph::unified::node::NodeKind;
1054
1055        let mut sg = StagingGraph::new();
1056        let local_0 = StringId::new_local(0);
1057        let local_1 = StringId::new_local(1);
1058        sg.intern_string(local_0, "alpha".into());
1059        sg.intern_string(local_1, "beta".into());
1060
1061        let mut entry = NodeEntry::new(NodeKind::Function, local_0, FileId::new(0));
1062        entry.signature = Some(local_1);
1063        sg.add_node(entry);
1064
1065        let plan = FilePlan {
1066            parsed_index: 0,
1067            file_id: FileId::new(42),
1068            node_range: 10..11,
1069            string_range: 20..22,
1070        };
1071
1072        let mut node_slots = vec![Slot::new_occupied(1, placeholder_entry())];
1073        let mut str_slots: Vec<Option<Arc<str>>> = vec![None, None];
1074        let mut rc_slots: Vec<u32> = vec![0, 0];
1075
1076        let result = commit_single_file(&sg, &plan, &mut node_slots, &mut str_slots, &mut rc_slots);
1077
1078        // Strings written
1079        assert_eq!(str_slots[0].as_deref(), Some("alpha"));
1080        assert_eq!(str_slots[1].as_deref(), Some("beta"));
1081        assert_eq!(rc_slots[0], 1);
1082        assert_eq!(rc_slots[1], 1);
1083        assert_eq!(result.strings_written, 2);
1084
1085        // Node entry has remapped StringIds
1086        if let crate::graph::unified::storage::SlotState::Occupied(entry) = node_slots[0].state() {
1087            assert_eq!(entry.name, StringId::new(20)); // global slot 20
1088            assert_eq!(entry.signature, Some(StringId::new(21))); // global slot 21
1089            assert_eq!(entry.file, FileId::new(42));
1090        } else {
1091            panic!("Expected occupied slot");
1092        }
1093        assert_eq!(result.nodes_written, 1);
1094
1095        // No edges
1096        assert!(result.edges.is_empty());
1097    }
1098
1099    #[test]
1100    fn test_remap_edge_kind_message_queue_other() {
1101        let mut remap = HashMap::new();
1102        remap.insert(StringId::new(10), StringId::new(110));
1103        remap.insert(StringId::new(20), StringId::new(220));
1104
1105        let mut kind = EdgeKind::MessageQueue {
1106            protocol: MqProtocol::Other(StringId::new(10)),
1107            topic: Some(StringId::new(20)),
1108        };
1109        remap_edge_kind_string_ids(&mut kind, &remap);
1110        assert!(matches!(
1111            kind,
1112            EdgeKind::MessageQueue {
1113                protocol: MqProtocol::Other(proto),
1114                topic: Some(topic),
1115            } if proto == StringId::new(110) && topic == StringId::new(220)
1116        ));
1117    }
1118
1119    // === Phase 4 tests ===
1120
1121    #[test]
1122    fn test_phase4_apply_global_remap_basic() {
1123        use crate::graph::unified::node::NodeKind;
1124        use crate::graph::unified::storage::NodeArena;
1125
1126        let mut arena = NodeArena::new();
1127
1128        // Allocate two nodes with duplicate string IDs (2 and 3 are dupes of 1)
1129        let entry1 = NodeEntry::new(NodeKind::Function, StringId::new(1), FileId::new(0));
1130        let mut entry2 = NodeEntry::new(NodeKind::Variable, StringId::new(2), FileId::new(0));
1131        entry2.signature = Some(StringId::new(3));
1132
1133        arena.alloc(entry1).unwrap();
1134        arena.alloc(entry2).unwrap();
1135
1136        // Edges with string IDs that need remapping
1137        let mut all_edges = vec![vec![PendingEdge {
1138            source: NodeId::new(0, 1),
1139            target: NodeId::new(1, 1),
1140            kind: EdgeKind::Imports {
1141                alias: Some(StringId::new(3)),
1142                is_wildcard: false,
1143            },
1144            file: FileId::new(0),
1145            spans: vec![],
1146        }]];
1147
1148        // Dedup remap: 2→1, 3→1
1149        let mut remap = HashMap::new();
1150        remap.insert(StringId::new(2), StringId::new(1));
1151        remap.insert(StringId::new(3), StringId::new(1));
1152
1153        phase4_apply_global_remap(&mut arena, &mut all_edges, &remap);
1154
1155        // Check that node 1's name was remapped from 2→1
1156        let (_, entry) = arena.iter().nth(1).unwrap();
1157        assert_eq!(entry.name, StringId::new(1));
1158        assert_eq!(entry.signature, Some(StringId::new(1)));
1159
1160        // Check that edge's alias was remapped from 3→1
1161        if let EdgeKind::Imports { alias, .. } = &all_edges[0][0].kind {
1162            assert_eq!(*alias, Some(StringId::new(1)));
1163        } else {
1164            panic!("Expected Imports edge");
1165        }
1166    }
1167
1168    #[test]
1169    fn test_phase4_apply_global_remap_empty() {
1170        use crate::graph::unified::storage::NodeArena;
1171
1172        let mut arena = NodeArena::new();
1173        let mut edges: Vec<Vec<PendingEdge>> = vec![];
1174        let remap = HashMap::new();
1175
1176        // Should be a no-op
1177        phase4_apply_global_remap(&mut arena, &mut edges, &remap);
1178    }
1179
1180    #[test]
1181    fn test_pending_edges_to_delta_basic() {
1182        let edges = vec![
1183            vec![
1184                PendingEdge {
1185                    source: NodeId::new(0, 1),
1186                    target: NodeId::new(1, 1),
1187                    kind: EdgeKind::Calls {
1188                        argument_count: 0,
1189                        is_async: false,
1190                    },
1191                    file: FileId::new(0),
1192                    spans: vec![],
1193                },
1194                PendingEdge {
1195                    source: NodeId::new(1, 1),
1196                    target: NodeId::new(2, 1),
1197                    kind: EdgeKind::References,
1198                    file: FileId::new(0),
1199                    spans: vec![],
1200                },
1201            ],
1202            vec![PendingEdge {
1203                source: NodeId::new(3, 1),
1204                target: NodeId::new(4, 1),
1205                kind: EdgeKind::Defines,
1206                file: FileId::new(1),
1207                spans: vec![],
1208            }],
1209        ];
1210
1211        let (deltas, final_seq) = pending_edges_to_delta(&edges, 100);
1212
1213        assert_eq!(deltas.len(), 2);
1214        assert_eq!(deltas[0].len(), 2);
1215        assert_eq!(deltas[1].len(), 1);
1216        assert_eq!(final_seq, 103);
1217
1218        // Check sequence numbers are monotonic
1219        assert_eq!(deltas[0][0].seq, 100);
1220        assert_eq!(deltas[0][1].seq, 101);
1221        assert_eq!(deltas[1][0].seq, 102);
1222
1223        // Check all are Add operations
1224        assert!(matches!(deltas[0][0].op, DeltaOp::Add));
1225        assert!(matches!(deltas[1][0].op, DeltaOp::Add));
1226    }
1227
1228    #[test]
1229    fn test_pending_edges_to_delta_empty() {
1230        let edges: Vec<Vec<PendingEdge>> = vec![];
1231        let (deltas, final_seq) = pending_edges_to_delta(&edges, 0);
1232        assert!(deltas.is_empty());
1233        assert_eq!(final_seq, 0);
1234    }
1235}