Skip to main content

sqry_core/graph/unified/build/
entrypoint.rs

1//! Build entrypoint for unified graph.
2//!
3//! This module provides the top-level API for building a unified graph from source files.
4//! It orchestrates file discovery and delegates to the 5-pass build pipeline.
5
6use std::fs;
7use std::path::{Path, PathBuf};
8use std::time::{Duration, Instant};
9
10use anyhow::{Context, Result};
11use ignore::WalkBuilder;
12use rayon::prelude::*;
13
14use crate::graph::GraphBuilderError;
15use crate::graph::error::GraphResult;
16use crate::graph::unified::analysis::LabelBudgetConfig;
17use crate::graph::unified::analysis::ReachabilityStrategy;
18use crate::graph::unified::build::StagingGraph;
19use crate::graph::unified::build::cancellation::CancellationToken;
20use crate::graph::unified::build::parallel_commit::{
21    GlobalOffsets, phase2_assign_ranges, phase3_parallel_commit, phase4_apply_global_remap,
22    phase4c_prime_unify_cross_file_nodes, phase4d_bulk_insert_edges,
23};
24use crate::graph::unified::build::pass3_intra::PendingEdge;
25use crate::graph::unified::build::progress::GraphBuildProgressTracker;
26use crate::graph::unified::concurrent::CodeGraph;
27use crate::io::FileReader;
28use crate::plugin::PluginManager;
29use crate::plugin::error::ParseError;
30use crate::plugin::{SafeParser, SafeParserConfig};
31use crate::progress::{SharedReporter, no_op_reporter};
32use crate::project::path_utils::normalize_path_components;
33
34/// Result of a successful build-and-persist operation.
35///
36/// Contains all metadata about the completed graph build, including
37/// canonical (deduplicated) edge counts, file counts by language, and
38/// provenance information.
39#[derive(Debug, Clone)]
40pub struct BuildResult {
41    /// Number of nodes in the graph.
42    pub node_count: usize,
43    /// Number of deduplicated edges (from analysis CSR, after merge/compaction).
44    /// This is the canonical edge count.
45    pub edge_count: usize,
46    /// Number of raw edges in the graph (CSR + delta buffer, before dedup).
47    /// Available for diagnostics; NOT the canonical count.
48    pub raw_edge_count: usize,
49    /// Number of indexed files, by language (e.g., `{"rust": 150, "python": 30}`).
50    ///
51    /// Counts files that entered the graph indexing pipeline and were
52    /// successfully parsed by a plugin. Not the same as "scanned files"
53    /// (all files walked by the directory scanner).
54    pub file_count: std::collections::HashMap<String, usize>,
55    /// Total number of indexed files.
56    pub total_files: usize,
57    /// ISO 8601 timestamp when the build completed.
58    pub built_at: String,
59    /// Root path that was indexed.
60    pub root_path: String,
61    /// Number of threads used for parallel file processing.
62    ///
63    /// Reflects the effective thread count from the rayon pool, not the
64    /// CLI-requested value. Useful for build diagnostics.
65    pub thread_count: usize,
66
67    /// Deterministic ordered built-in plugin ids active during the build.
68    pub active_plugin_ids: Vec<String>,
69
70    /// Reachability strategy used by each persisted analysis kind.
71    pub analysis_strategies: Vec<AnalysisStrategySummary>,
72}
73
74/// Persisted analysis strategy summary for one edge kind.
75#[derive(Debug, Clone, PartialEq, Eq)]
76pub struct AnalysisStrategySummary {
77    /// Stable edge-kind label (`calls`, `imports`, `references`, `inherits`).
78    pub edge_kind: &'static str,
79    /// Reachability strategy persisted for the edge kind.
80    pub strategy: ReachabilityStrategy,
81}
82
83/// Default staging memory limit per batch: 512 MB.
84///
85/// When the accumulated `StagingGraph` memory exceeds this threshold, the
86/// current batch is committed before parsing the next chunk. Override via
87/// `SQRY_STAGING_MEMORY_LIMIT_MB` or [`BuildConfig::staging_memory_limit`].
88const DEFAULT_STAGING_MEMORY_LIMIT: usize = 512 * 1024 * 1024;
89
90/// Configuration for building the unified graph.
91#[derive(Debug, Clone)]
92pub struct BuildConfig {
93    /// Maximum directory depth to traverse (None = unlimited).
94    pub max_depth: Option<usize>,
95
96    /// Follow symbolic links.
97    pub follow_links: bool,
98
99    /// Include hidden files and directories.
100    pub include_hidden: bool,
101
102    /// Number of threads for parallel building (None = use default based on CPU count).
103    pub num_threads: Option<usize>,
104
105    /// Maximum staging memory (bytes) to accumulate before committing a batch.
106    ///
107    /// Controls the parse-commit chunking watermark. When the sum of all
108    /// in-flight `StagingGraph` buffers exceeds this limit, the batch is
109    /// committed to the graph before the next chunk of files is parsed.
110    ///
111    /// Defaults to 512 MB. Override via
112    /// `SQRY_STAGING_MEMORY_LIMIT_MB` environment variable.
113    pub staging_memory_limit: usize,
114
115    /// Configuration for the 2-hop label budget used during analysis.
116    ///
117    /// Controls the maximum number of intervals per edge kind and what
118    /// to do when the budget is exceeded (fail or degrade to BFS).
119    pub label_budget: LabelBudgetConfig,
120}
121
122impl Default for BuildConfig {
123    fn default() -> Self {
124        let limit = std::env::var("SQRY_STAGING_MEMORY_LIMIT_MB")
125            .ok()
126            .and_then(|v| v.parse::<usize>().ok())
127            .map_or(DEFAULT_STAGING_MEMORY_LIMIT, |mb| mb * 1024 * 1024);
128
129        let label_budget = LabelBudgetConfig {
130            budget_per_kind: 15_000_000,
131            on_exceeded: crate::graph::unified::analysis::BudgetExceededPolicy::Degrade,
132            density_gate_threshold: 64,
133            skip_labels: false,
134        };
135
136        Self {
137            max_depth: None,
138            follow_links: false,
139            include_hidden: false,
140            num_threads: None,
141            staging_memory_limit: limit,
142            label_budget,
143        }
144    }
145}
146
147/// Create a rayon thread pool sized by `BuildConfig::num_threads`.
148fn create_thread_pool(config: &BuildConfig) -> Result<rayon::ThreadPool> {
149    let mut builder = rayon::ThreadPoolBuilder::new();
150    if let Some(n) = config.num_threads {
151        builder = builder.num_threads(n);
152    }
153    builder
154        .build()
155        .context("Failed to create rayon thread pool for parallel indexing")
156}
157
158/// Compute chunk boundaries for memory-bounded parallel parse batches.
159///
160/// Splits `files` into non-overlapping ranges where each chunk's estimated
161/// staging memory stays within `memory_limit`. Uses source file size as a
162/// proxy for staging buffer size (multiplied by an expansion factor to
163/// account for AST node/edge/string overhead).
164///
165/// Returns at least one chunk even if the first file alone exceeds the limit.
166fn compute_parse_chunks(
167    files: &[PathBuf],
168    _pool: &rayon::ThreadPool,
169    _plugins: &PluginManager,
170    memory_limit: usize,
171) -> Vec<std::ops::Range<usize>> {
172    // Expansion factor: staging buffers are typically 2-8x the source file
173    // size due to AST nodes, edges, and interned strings. Use 4x as a
174    // conservative middle ground.
175    const EXPANSION_FACTOR: usize = 4;
176
177    let mut chunks = Vec::new();
178    let mut chunk_start = 0;
179    let mut chunk_estimate = 0usize;
180
181    for (i, path) in files.iter().enumerate() {
182        #[allow(clippy::cast_possible_truncation)] // File sizes always fit usize on 32/64-bit.
183        let file_size = std::fs::metadata(path)
184            .map(|m| m.len() as usize)
185            .unwrap_or(0);
186        let estimated_staging = file_size * EXPANSION_FACTOR;
187
188        // If adding this file would exceed the limit and we already have
189        // files in the chunk, finalize the current chunk first.
190        if chunk_estimate + estimated_staging > memory_limit && i > chunk_start {
191            chunks.push(chunk_start..i);
192            chunk_start = i;
193            chunk_estimate = 0;
194        }
195        chunk_estimate += estimated_staging;
196    }
197
198    // Final chunk (always push — handles single-chunk and trailing files)
199    if chunk_start < files.len() {
200        chunks.push(chunk_start..files.len());
201    }
202
203    if chunks.len() > 1 {
204        log::info!(
205            "Memory-bounded chunking: {} batches for {} files (limit: {} MB)",
206            chunks.len(),
207            files.len(),
208            memory_limit / (1024 * 1024),
209        );
210    }
211
212    chunks
213}
214
215/// Phase name for file processing during graph build.
216pub const GRAPH_FILE_PROCESSING_PHASE: &str = "File processing";
217
218/// Build a unified graph from source files.
219///
220/// This function:
221/// 1. Walks the file tree starting at `root`
222/// 2. For each file, extracts symbols using the appropriate language plugin
223/// 3. Runs the 5-pass build pipeline to populate the graph
224/// 4. Returns the completed `CodeGraph`
225///
226/// # Arguments
227///
228/// * `root` - Root directory to scan for source files
229/// * `plugins` - Plugin manager for language-specific extraction
230/// * `config` - Build configuration
231///
232/// # Returns
233///
234/// A `CodeGraph` containing the populated graph.
235///
236/// # Errors
237///
238/// Returns an error if:
239/// - The root path does not exist
240/// - No graph builders are registered
241/// - All eligible files fail to build (per-file failures are logged and skipped)
242///
243/// # Example
244///
245/// ```ignore
246/// use sqry_core::graph::unified::build::{build_unified_graph, BuildConfig};
247/// use sqry_core::plugin::PluginManager;
248/// use std::path::Path;
249///
250/// let plugins = sqry_plugin_registry::create_plugin_manager();
251/// let config = BuildConfig::default();
252/// let graph = build_unified_graph(Path::new("src"), &plugins, &config)?;
253/// println!("Created graph with {} nodes", graph.node_count());
254/// ```
255pub fn build_unified_graph(
256    root: &Path,
257    plugins: &PluginManager,
258    config: &BuildConfig,
259) -> Result<CodeGraph> {
260    build_unified_graph_cancellable(root, plugins, config, &CancellationToken::default())
261        .map_err(anyhow::Error::from)
262}
263
264/// Build a unified graph from source files with progress reporting.
265///
266/// This is the same as [`build_unified_graph`] but accepts a progress reporter
267/// for tracking build progress.
268///
269/// # Arguments
270///
271/// * `root` - Root directory to scan for source files
272/// * `plugins` - Plugin manager for language-specific extraction
273/// * `config` - Build configuration
274/// * `progress` - Progress reporter for build status updates
275///
276/// # Returns
277///
278/// A `CodeGraph` containing the populated graph.
279///
280/// # Errors
281///
282/// Returns an error if the path is missing, no graph builders are registered,
283/// or all eligible files fail to build.
284pub fn build_unified_graph_with_progress(
285    root: &Path,
286    plugins: &PluginManager,
287    config: &BuildConfig,
288    progress: SharedReporter,
289) -> Result<(CodeGraph, usize)> {
290    build_unified_graph_with_progress_cancellable(
291        root,
292        plugins,
293        config,
294        progress,
295        &CancellationToken::default(),
296    )
297    .map_err(anyhow::Error::from)
298}
299
300/// Build a unified graph with cooperative cancellation.
301///
302/// Behaves identically to [`build_unified_graph`] except that the
303/// `cancellation` token is polled at every pass boundary. A cancelled
304/// token causes the pipeline to return [`GraphBuilderError::Cancelled`]
305/// at the next boundary.
306///
307/// Used by the sqryd daemon's rebuild dispatcher to abort in-flight
308/// full rebuilds when a workspace is evicted mid-build.
309///
310/// # Errors
311///
312/// Returns [`GraphBuilderError::Cancelled`] if the token is cancelled
313/// at any pass boundary; otherwise the same error modes as
314/// [`build_unified_graph`] (lifted from `anyhow::Error` into
315/// [`GraphBuilderError::Internal`]).
316pub fn build_unified_graph_cancellable(
317    root: &Path,
318    plugins: &PluginManager,
319    config: &BuildConfig,
320    cancellation: &CancellationToken,
321) -> GraphResult<CodeGraph> {
322    let (graph, _effective_threads) =
323        build_unified_graph_inner(root, plugins, config, no_op_reporter(), cancellation)?;
324    Ok(graph)
325}
326
327/// Build a unified graph with cooperative cancellation AND a progress
328/// reporter.
329///
330/// Combines [`build_unified_graph_cancellable`] + the progress
331/// reporter variant.
332///
333/// # Errors
334///
335/// Same as [`build_unified_graph_cancellable`].
336pub fn build_unified_graph_with_progress_cancellable(
337    root: &Path,
338    plugins: &PluginManager,
339    config: &BuildConfig,
340    progress: SharedReporter,
341    cancellation: &CancellationToken,
342) -> GraphResult<(CodeGraph, usize)> {
343    build_unified_graph_inner(root, plugins, config, progress, cancellation)
344}
345
346/// Internal implementation that returns the effective thread count alongside the graph.
347///
348/// Used by [`build_and_persist_graph_with_progress`] to propagate the thread count
349/// into `BuildResult` without exposing it in the public API.
350///
351/// Accepts a [`CancellationToken`] which is polled at every pass
352/// boundary. Callers that do not need cancellation pass
353/// `&CancellationToken::default()` (via the `build_unified_graph` +
354/// `build_unified_graph_with_progress` wrappers).
355#[allow(clippy::too_many_lines)] // Complex 5-pass build pipeline requires sequential flow
356fn build_unified_graph_inner(
357    root: &Path,
358    plugins: &PluginManager,
359    config: &BuildConfig,
360    progress: SharedReporter,
361    cancellation: &CancellationToken,
362) -> GraphResult<(CodeGraph, usize)> {
363    if !root.exists() {
364        return Err(GraphBuilderError::Internal {
365            reason: format!("Path {} does not exist", root.display()),
366        });
367    }
368
369    log::info!(
370        "Building unified graph from source files in {}",
371        root.display()
372    );
373
374    // 7c cancellation boundary 1: pre-build, after arg validation.
375    cancellation.check()?;
376
377    let has_graph_builders = plugins
378        .plugins()
379        .iter()
380        .any(|plugin| plugin.graph_builder().is_some());
381    if !has_graph_builders {
382        return Err(GraphBuilderError::Internal {
383            reason: "No graph builders registered – cannot build code graph".to_string(),
384        });
385    }
386
387    // Create progress tracker for this build
388    let tracker = GraphBuildProgressTracker::new(progress);
389
390    // 1. Find source files
391    let mut files = find_source_files(root, config);
392    sort_files_for_build(root, &mut files);
393
394    // 7c cancellation boundary 2: after file discovery, before thread
395    // pool creation + graph allocation.
396    cancellation.check()?;
397
398    // 2. Create the unified graph
399    let mut graph = CodeGraph::new();
400
401    // 3. Create scoped thread pool for parallel parse
402    let pool = create_thread_pool(config).map_err(|e| GraphBuilderError::Internal {
403        reason: format!("thread pool: {e}"),
404    })?;
405    let effective_threads = pool.current_num_threads();
406    log::info!("Parallel indexing: using {effective_threads} threads");
407
408    // Chunked parallel-parse / parallel-commit pipeline.
409    //
410    // Files are processed in memory-bounded batches (chunks). Each chunk:
411    //   Phase 1: Parse files in parallel (rayon thread pool)
412    //   Phase 2: Count + prefix-sum range assignment
413    //   Phase 3: Parallel commit into disjoint pre-allocated arena/interner ranges
414    //   Phase 4: After ALL chunks — string dedup, global remap, index build, edge bulk insert
415    //
416    // The batch boundary is determined by `staging_memory_limit`: once the
417    // accumulated staging buffer size exceeds the watermark, the current
418    // batch is committed before more files are parsed. This prevents OOM
419    // on large repositories where holding all StagingGraphs simultaneously
420    // would exhaust available RAM.
421    let total_files = files.len();
422    tracker.start_phase(
423        1,
424        "Chunked structural indexing (parse -> range-plan -> semantic commit)",
425        total_files,
426    );
427
428    let (mut succeeded, mut parse_errors, mut skipped, mut timed_out) =
429        (0usize, 0usize, 0usize, 0usize);
430    let mut total_staging_bytes = 0usize;
431    let mut peak_chunk_staging_bytes = 0usize;
432    let mut max_file_staging_bytes = 0usize;
433
434    // Global offsets track running positions across chunks.
435    // For a fresh graph: node arena starts at 0 slots, string interner at 1 (sentinel).
436    let initial_string_offset = graph.strings_mut().alloc_range(0).unwrap_or(1);
437    let mut offsets = GlobalOffsets {
438        node_offset: u32::try_from(graph.nodes().slot_count()).unwrap_or(0),
439        string_offset: initial_string_offset,
440    };
441    // Collect all edges across chunks for Phase 4 bulk insert.
442    let mut all_edges: Vec<Vec<PendingEdge>> = Vec::new();
443
444    let chunks = compute_parse_chunks(&files, &pool, plugins, config.staging_memory_limit);
445    for chunk_range in chunks {
446        // 7c cancellation boundary 3: top of each chunk iteration.
447        cancellation.check()?;
448
449        let chunk_files = &files[chunk_range];
450
451        // 7c test hook: observation point fired at the top of each
452        // chunk. Tests that need to flip the cancellation token
453        // between chunks register a callback here. Production builds
454        // compile this call out entirely.
455        #[cfg(any(test, feature = "rebuild-internals"))]
456        testing::fire_after_chunk_hook(cancellation);
457
458        // Phase 1: Parallel parse this chunk
459        let staged_results: Vec<(PathBuf, Result<ParsedFileOutcome>)> = pool.install(|| {
460            chunk_files
461                .par_iter()
462                .map(|path| {
463                    let result = parse_file(path.as_path(), plugins);
464                    tracker.increment_progress();
465                    (path.clone(), result)
466                })
467                .collect()
468        });
469
470        // Separate successful parses from errors/skips
471        let mut chunk_parsed: Vec<(PathBuf, ParsedFile)> = Vec::new();
472        let mut chunk_staging_bytes = 0usize;
473        for (path, result) in staged_results {
474            match result {
475                Ok(ParsedFileOutcome::Parsed(parsed)) => {
476                    let file_bytes = parsed.staging.estimated_byte_size();
477                    total_staging_bytes += file_bytes;
478                    chunk_staging_bytes += file_bytes;
479                    if file_bytes > max_file_staging_bytes {
480                        max_file_staging_bytes = file_bytes;
481                    }
482                    chunk_parsed.push((path, parsed));
483                }
484                Ok(ParsedFileOutcome::Skipped) => skipped += 1,
485                Ok(ParsedFileOutcome::TimedOut {
486                    file,
487                    phase,
488                    timeout_ms,
489                }) => {
490                    timed_out += 1;
491                    log::warn!(
492                        "Timed out building graph for {} during {} after {} ms",
493                        file.display(),
494                        phase,
495                        timeout_ms,
496                    );
497                }
498                Err(e) => {
499                    parse_errors += 1;
500                    log::warn!("Failed to parse {}: {e}", path.display());
501                }
502            }
503        }
504        if chunk_staging_bytes > peak_chunk_staging_bytes {
505            peak_chunk_staging_bytes = chunk_staging_bytes;
506        }
507
508        if chunk_parsed.is_empty() {
509            continue;
510        }
511
512        // Register files in batch
513        let file_info: Vec<_> = chunk_parsed
514            .iter()
515            .map(|(path, parsed)| (path.clone(), Some(parsed.language)))
516            .collect();
517        let file_ids = graph.files_mut().register_batch(&file_info).map_err(|e| {
518            GraphBuilderError::Internal {
519                reason: format!("Failed to register files: {e}"),
520            }
521        })?;
522
523        // Phase 2: Count + range assignment (fast, no progress needed)
524        let staging_refs: Vec<_> = chunk_parsed.iter().map(|(_, p)| &p.staging).collect();
525        let plan = phase2_assign_ranges(&staging_refs, &file_ids, &offsets);
526
527        // Pre-allocate arena and interner ranges for Phase 3.
528        let placeholder = crate::graph::unified::storage::NodeEntry::new(
529            crate::graph::unified::node::NodeKind::Other,
530            crate::graph::unified::string::StringId::new(0),
531            crate::graph::unified::file::FileId::new(0),
532        );
533        graph
534            .nodes_mut()
535            .alloc_range(plan.total_nodes, &placeholder)
536            .map_err(|e| GraphBuilderError::Internal {
537                reason: format!("Failed to alloc node range: {e:?}"),
538            })?;
539        graph
540            .strings_mut()
541            .alloc_range(plan.total_strings)
542            .map_err(|e| GraphBuilderError::Internal {
543                reason: format!("Failed to alloc string range: {e}"),
544            })?;
545
546        // Phase 3: Parallel commit into disjoint pre-allocated ranges.
547        // Use pool.install to respect BuildConfig::num_threads for rayon par_iter.
548        //
549        // `phase3_parallel_commit` is generic over
550        // `G: GraphMutationTarget` as of Task 4 Step 4 Phase 1; here
551        // the inferred `G` is `CodeGraph`, and the helper reaches the
552        // arena + interner via `graph.nodes_and_strings_mut()`
553        // internally.
554        let phase3 = pool.install(|| phase3_parallel_commit(&plan, &staging_refs, &mut graph));
555
556        // Validate written counts match plan. A mismatch indicates a bug in
557        // StagingGraph counting — abort the build to prevent phantom entries
558        // and inconsistent file registry state.
559        let expected_nodes = plan.total_nodes as usize;
560        let expected_strings = plan.total_strings as usize;
561        let expected_edges = usize::try_from(plan.total_edges)
562            .unwrap_or_else(|_| unreachable!("edge count does not fit usize"));
563        if phase3.total_nodes_written != expected_nodes
564            || phase3.total_strings_written != expected_strings
565            || phase3.total_edges_collected != expected_edges
566        {
567            return Err(GraphBuilderError::Internal {
568                reason: format!(
569                    "Phase 3 count mismatch: nodes {}/{expected_nodes}, strings {}/{expected_strings}, edges {}/{expected_edges}. This indicates a bug in StagingGraph counting.",
570                    phase3.total_nodes_written,
571                    phase3.total_strings_written,
572                    phase3.total_edges_collected,
573                ),
574            });
575        }
576
577        // Populate FileSegmentTable from the chunk's file plans.
578        for fp in &plan.file_plans {
579            let start = fp.node_range.start;
580            let count = fp.node_range.end.saturating_sub(start);
581            graph
582                .file_segments_mut()
583                .record_range(fp.file_id, start, count);
584        }
585
586        // Populate FileRegistry::per_file_nodes from Phase 3's
587        // committed-NodeId vectors. This is the Gate 0c iter-2 B2 fix
588        // (pulled base-plan Step 1 forward): each NodeId committed by
589        // parallel-parse is bucketed by its owning FileId so the
590        // bucket-bijection debug invariant at publish time can verify
591        // arena ↔ bucket consistency against real data instead of a
592        // vacuously-empty map.
593        //
594        // Iteration order matches `plan.file_plans`, which is
595        // deterministic across runs. `per_file_node_ids[i]` is the
596        // set of NodeIds committed for `plan.file_plans[i]`; the
597        // registry's `record_node` is O(1) amortised per call.
598        debug_assert_eq!(
599            phase3.per_file_node_ids.len(),
600            plan.file_plans.len(),
601            "phase3 per-file node ID vector length must match plan length"
602        );
603        for (fp, node_ids) in plan.file_plans.iter().zip(phase3.per_file_node_ids.iter()) {
604            for nid in node_ids {
605                graph.files_mut().record_node(fp.file_id, *nid);
606            }
607        }
608
609        succeeded += chunk_parsed.len();
610
611        // Merge confidence metadata from parsed files
612        for (_path, parsed) in &mut chunk_parsed {
613            if let Some(confidence) = parsed.staging.take_confidence() {
614                let language_name = parsed.language.to_string();
615                graph.merge_confidence(&language_name, confidence);
616            }
617        }
618
619        // Update global offsets for next chunk
620        offsets.node_offset += plan.total_nodes;
621        offsets.string_offset += plan.total_strings;
622
623        // 7c cancellation boundary 4: after chunk commit, before
624        // accumulating edges for Phase 4.
625        cancellation.check()?;
626
627        // Accumulate edges for Phase 4
628        all_edges.extend(phase3.per_file_edges);
629    }
630    tracker.complete_phase();
631
632    // 7c test hook: observation point fired after the chunk loop exits
633    // and before Phase 4 finalization. Tests that need to flip the
634    // cancellation token at this boundary register a callback here.
635    #[cfg(any(test, feature = "rebuild-internals"))]
636    testing::fire_before_phase4_hook(cancellation);
637
638    // Phase 4: Post-chunk finalization
639    tracker.start_phase(4, "Finalizing graph", 5);
640
641    // 7c cancellation boundary 5: pre-Phase-4a.
642    cancellation.check()?;
643
644    // Phase 4a: Global string dedup
645    let string_remap = graph.strings_mut().build_dedup_table();
646    if !string_remap.is_empty() {
647        log::debug!(
648            "Phase 4a: dedup removed {} duplicate string(s)",
649            string_remap.len()
650        );
651
652        // Phase 4b: Apply dedup remap to all nodes and pending edges
653        phase4_apply_global_remap(graph.nodes_mut(), &mut all_edges, &string_remap);
654    }
655    tracker.increment_progress(); // 4a+4b done
656
657    // 7c cancellation boundary 6: pre-Phase-4c (rebuild_indices).
658    cancellation.check()?;
659
660    // Phase 4c: Build indices from finalized arena.
661    // Uses build_from_arena() which is O(n log n) — no per-element duplicate check.
662    graph.rebuild_indices();
663    tracker.increment_progress(); // 4c done
664
665    // 7c cancellation boundary 7: pre-Phase-4c-prime
666    // (phase4c_prime_unify_cross_file_nodes).
667    cancellation.check()?;
668
669    // Phase 4c-prime: Cross-file node unification.
670    // Walk the arena for nodes sharing a qualified name and a call-compatible kind,
671    // merge duplicates into a single canonical node, and rewrite PendingEdge targets.
672    // Must run AFTER rebuild_indices (uses by_qualified_name) and BEFORE Phase 4d
673    // (operates on PendingEdge, not committed DeltaEdge).
674    let unification_stats = phase4c_prime_unify_cross_file_nodes(&mut graph, &mut all_edges);
675    if unification_stats.nodes_merged > 0 {
676        log::info!(
677            "Phase 4c-prime: unified {} duplicate nodes ({} candidate groups examined, \
678             {} edges rewritten, {} ms)",
679            unification_stats.nodes_merged,
680            unification_stats.candidate_pairs_examined,
681            unification_stats.edges_rewritten,
682            unification_stats.elapsed_ms,
683        );
684        // 7c cancellation boundary 7b: post-4c-prime, before the
685        // optional second rebuild_indices. Codex iter-0 MAJOR: without
686        // this check, a cancellation observed after the unification
687        // walk still pays another O(n log n) index rebuild.
688        cancellation.check()?;
689        // Rebuild indices after tombstoning loser nodes
690        graph.rebuild_indices();
691    }
692    tracker.increment_progress(); // 4c-prime done
693
694    // 7c cancellation boundary 8: pre-Phase-4d (bulk edge insert).
695    cancellation.check()?;
696
697    // Phase 4d: Bulk insert edges via deterministic DeltaEdge conversion.
698    // Wraps the pure pending_edges_to_delta + add_edges_bulk_ordered pair
699    // behind phase4d_bulk_insert_edges so the incremental rebuild path
700    // (Task 4 Step 4 Phase 3) can reuse the same helper against a
701    // RebuildGraph. The helper carries forward the edge store's current
702    // seq counter so non-empty graphs advance deterministically.
703    let _final_edge_seq = phase4d_bulk_insert_edges(&mut graph, &all_edges);
704    tracker.increment_progress(); // 4d done
705    tracker.complete_phase();
706
707    log::info!(
708        "Parallel indexing complete: {succeeded} committed, {skipped} skipped, \
709         {timed_out} timed out, {parse_errors} parse errors, \
710         ~{} MB total staged, ~{} MB peak chunk (max single file: ~{} KB)",
711        total_staging_bytes / (1024 * 1024),
712        peak_chunk_staging_bytes / (1024 * 1024),
713        max_file_staging_bytes / 1024,
714    );
715
716    let attempted = succeeded + parse_errors + timed_out;
717
718    if attempted == 0 {
719        log::warn!(
720            "No eligible source files found for graph build in {}",
721            root.display()
722        );
723    }
724
725    if attempted > 0 && succeeded == 0 {
726        return Err(GraphBuilderError::Internal {
727            reason: "All graph builds failed".to_string(),
728        });
729    }
730
731    // 7c cancellation boundary 9: pre-Phase-4e (binding plane).
732    cancellation.check()?;
733
734    // ------------------------------------------------------------------
735    // Phase 4e — Binding plane derivation.
736    //
737    // Runs between Phase 4d (bulk edge insert) and Pass 5 (cross-language
738    // linking). Consumes only the language-local edge kinds Contains,
739    // Defines, Imports, Exports. Populates CodeGraph::scope_arena (P2U03),
740    // CodeGraph::alias_table (P2U04), CodeGraph::shadow_table (P2U05), and
741    // CodeGraph::scope_provenance_store (P2U11) in one pass.
742    // ------------------------------------------------------------------
743    tracker.start_phase(5, "Binding plane derivation", 1);
744    let binding_stats = super::phase4e_binding::derive_binding_plane(&mut graph);
745    log::info!(
746        target: "sqry_core::build",
747        "Phase 4e: {} scopes, {} aliases, {} shadows derived",
748        binding_stats.scopes,
749        binding_stats.aliases,
750        binding_stats.shadows,
751    );
752    tracker.increment_progress();
753    tracker.complete_phase();
754
755    // 7c test hook: observation point fired before Pass 5. Tests that
756    // need to flip the cancellation token at this boundary register a
757    // callback here (fires BEFORE the check below so a hook that flips
758    // the token is observed by the subsequent check).
759    #[cfg(any(test, feature = "rebuild-internals"))]
760    testing::fire_before_pass5_hook(cancellation);
761
762    // 7c cancellation boundary 10: pre-Pass-5 (cross-language linking).
763    cancellation.check()?;
764
765    // Pass 5: Cross-language linking (FFI declarations → C/C++ functions, HTTP requests → endpoints)
766    tracker.start_phase(6, "Cross-language linking", 1);
767    let pass5_stats = super::pass5_cross_language::link_cross_language_edges(&mut graph);
768    if pass5_stats.total_edges_created > 0 {
769        log::info!(
770            "Pass 5: {} cross-language edges created ({} FFI, {} HTTP)",
771            pass5_stats.total_edges_created,
772            pass5_stats.ffi_edges_created,
773            pass5_stats.http_endpoints_matched,
774        );
775    }
776    tracker.increment_progress(); // pass 5 done
777    tracker.complete_phase();
778
779    log::info!("Built unified graph with {} nodes", graph.node_count());
780
781    // Publish-boundary invariants (A2 §F / Task 4 Gate 0d).
782    //
783    // This is the canonical "full rebuild end" call site named in plan
784    // §F.3. Full rebuilds have no tombstoned NodeIds to carry forward,
785    // so the §F.2 residue check does not run here — per plan §H step
786    // 14, the residue check has EXACTLY ONE call site
787    // (`RebuildGraph::finalize` step 14) against the drained tombstone
788    // set. Full rebuilds run the §F.1 bucket bijection only, via
789    // [`crate::graph::unified::publish::assert_publish_bijection`]:
790    // every parallel-commit chunk populates per-file buckets via
791    // `FileRegistry::record_node`, and the bijection proves no file
792    // ended up with a dead / duplicate / misfiled / missing node.
793    //
794    // In release builds the helper is a no-op; see `publish.rs`.
795    super::super::publish::assert_publish_bijection(&graph);
796
797    Ok((graph, effective_threads))
798}
799
800/// Build unified graph, persist snapshot + manifest, and run analysis pipeline.
801///
802/// Convenience wrapper that uses a no-op progress reporter.
803/// See [`build_and_persist_graph_with_progress`] for full documentation.
804///
805/// # Errors
806///
807/// Returns an error if graph building, persistence, or analysis fails.
808pub fn build_and_persist_graph(
809    root: &Path,
810    plugins: &PluginManager,
811    config: &BuildConfig,
812    build_command: &str,
813) -> Result<(CodeGraph, BuildResult)> {
814    build_and_persist_graph_with_progress(
815        root,
816        plugins,
817        config,
818        build_command,
819        inferred_plugin_selection_manifest(plugins),
820        no_op_reporter(),
821    )
822}
823
824fn inferred_plugin_selection_manifest(
825    plugins: &PluginManager,
826) -> Option<crate::graph::unified::persistence::PluginSelectionManifest> {
827    let active_plugin_ids = plugins
828        .plugins()
829        .iter()
830        .map(|plugin| plugin.metadata().id.to_string())
831        .collect::<Vec<_>>();
832    if active_plugin_ids.is_empty() {
833        return None;
834    }
835
836    Some(
837        crate::graph::unified::persistence::PluginSelectionManifest {
838            active_plugin_ids,
839            high_cost_mode: None,
840        },
841    )
842}
843
844/// Persist a pre-built graph and run the analysis pipeline.
845///
846/// This is the persist+analysis portion of
847/// [`build_and_persist_graph_with_progress`], extracted so callers can enrich
848/// the graph between build and persist.
849///
850/// # Errors
851///
852/// Returns an error if persistence or analysis fails.
853#[allow(clippy::too_many_lines, clippy::needless_pass_by_value)]
854pub fn persist_and_analyze_graph(
855    graph: CodeGraph,
856    root: &Path,
857    plugins: &PluginManager,
858    config: &BuildConfig,
859    build_command: &str,
860    plugin_selection: Option<crate::graph::unified::persistence::PluginSelectionManifest>,
861    progress: SharedReporter,
862    effective_threads: usize,
863) -> Result<(CodeGraph, BuildResult)> {
864    use crate::graph::unified::analysis::csr::CsrAdjacency;
865    use crate::graph::unified::analysis::{AnalysisIdentity, GraphAnalyses, compute_node_id_hash};
866    use crate::graph::unified::compaction::{Direction, build_compacted_csr, snapshot_edges};
867    use crate::graph::unified::persistence::manifest::write_manifest_bytes_atomic;
868    use crate::graph::unified::persistence::{
869        BuildProvenance, GraphStorage, MANIFEST_SCHEMA_VERSION, Manifest, SNAPSHOT_FORMAT_VERSION,
870        save_to_path,
871    };
872    use crate::progress::IndexProgress;
873    use chrono::Utc;
874    use sha2::{Digest, Sha256};
875
876    // Step 1: Ensure storage directories exist and remove old manifest
877    // Removing the manifest BEFORE writing the new snapshot ensures that
878    // readers see `storage.exists() == false` during the rebuild window.
879    // Without this, an interrupted rebuild (crash after snapshot write but
880    // before manifest write) would leave the old manifest paired with a
881    // new, potentially incompatible snapshot — violating the commit-point
882    // contract.
883    let storage = GraphStorage::new(root);
884    fs::create_dir_all(storage.graph_dir())
885        .with_context(|| format!("Failed to create {}", storage.graph_dir().display()))?;
886
887    if storage.exists() {
888        // Remove old manifest so readers don't see stale readiness.
889        // This MUST succeed before we overwrite the snapshot — otherwise a
890        // crash between snapshot write and manifest write leaves stale
891        // readiness (old manifest + new snapshot).  NotFound is harmless
892        // (race or already cleaned up); any other error is fatal.
893        match fs::remove_file(storage.manifest_path()) {
894            Ok(()) => {}
895            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
896            Err(e) => {
897                return Err(e).with_context(|| {
898                    format!(
899                        "Failed to remove old manifest at {} — rebuild cannot proceed safely",
900                        storage.manifest_path().display()
901                    )
902                });
903            }
904        }
905    }
906
907    // Step 2: Capture raw edge count before compaction changes it
908    let raw_edge_count = graph.edge_count();
909    let node_count = graph.node_count();
910
911    // Step 3: Compact edge stores into CSR before persistence
912    //
913    // The build pipeline inserts all edges into the DeltaBuffer (write-optimized).
914    // Without compaction, the persisted snapshot stores edges in delta, causing
915    // O(N) scans for every edges_from()/edges_to() call on load. Compacting to
916    // CSR gives O(degree) lookups — critical for kernel-scale graphs (22M edges).
917    progress.report(IndexProgress::StageStarted {
918        stage_name: "Compacting edge stores for persistence",
919    });
920    let compaction_start = std::time::Instant::now();
921
922    // Snapshot both edge stores (sequential — holds read locks briefly)
923    let forward_compaction_snapshot = {
924        let forward_store = graph.edges().forward();
925        snapshot_edges(&forward_store, node_count)
926    };
927    let reverse_compaction_snapshot = {
928        let reverse_store = graph.edges().reverse();
929        snapshot_edges(&reverse_store, node_count)
930    };
931
932    // Build both CSRs in parallel (CPU-intensive, no locks held)
933    let (forward_result, reverse_result) = rayon::join(
934        || build_compacted_csr(&forward_compaction_snapshot, Direction::Forward),
935        || build_compacted_csr(&reverse_compaction_snapshot, Direction::Reverse),
936    );
937
938    let (forward_csr, _forward_build_stats) =
939        forward_result.context("Failed to build forward CSR for persistence compaction")?;
940    let (reverse_csr, _reverse_build_stats) =
941        reverse_result.context("Failed to build reverse CSR for persistence compaction")?;
942
943    // Drop snapshots — no longer needed
944    drop(forward_compaction_snapshot);
945    drop(reverse_compaction_snapshot);
946
947    // Build analysis adjacency from forward CSR before it's consumed by swap.
948    // This replaces the expensive build_from_snapshot merge+sort (~11s on kernel).
949    let adjacency = CsrAdjacency::from_csr_graph(&forward_csr);
950
951    // Atomic mutation phase: swap both CSRs and clear both deltas
952    graph
953        .edges()
954        .swap_csrs_and_clear_deltas(forward_csr, reverse_csr);
955
956    progress.report(IndexProgress::StageCompleted {
957        stage_name: "Compacting edge stores for persistence",
958        stage_duration: compaction_start.elapsed(),
959    });
960
961    // Step 4: Save CSR-backed binary snapshot
962    progress.report(IndexProgress::SavingStarted {
963        component_name: "unified graph",
964    });
965    let save_start = std::time::Instant::now();
966
967    save_to_path(&graph, storage.snapshot_path()).with_context(|| {
968        format!(
969            "Failed to save snapshot to {}",
970            storage.snapshot_path().display()
971        )
972    })?;
973
974    progress.report(IndexProgress::SavingCompleted {
975        component_name: "unified graph",
976        save_duration: save_start.elapsed(),
977    });
978
979    // Step 5: Compute snapshot checksum
980    let snapshot_content =
981        fs::read(storage.snapshot_path()).context("Failed to read snapshot for checksum")?;
982    let snapshot_sha256 = hex::encode(Sha256::digest(&snapshot_content));
983
984    // Step 6: Build full analyses from the prebuilt adjacency.
985    // CsrAdjacency was already derived from the forward CsrGraph in Step 4,
986    // eliminating the expensive re-merge from CompactionSnapshot.
987    progress.report(IndexProgress::StageStarted {
988        stage_name: "Computing graph analyses",
989    });
990    let analysis_start = std::time::Instant::now();
991
992    let analyses = if let Some(thread_count) = config.num_threads {
993        rayon::ThreadPoolBuilder::new()
994            .num_threads(thread_count)
995            .build()
996            .context("Failed to create rayon thread pool for graph analysis")?
997            .install(|| {
998                GraphAnalyses::build_all_from_adjacency_with_budget(adjacency, &config.label_budget)
999            })
1000    } else {
1001        GraphAnalyses::build_all_from_adjacency_with_budget(adjacency, &config.label_budget)
1002    }
1003    .context("Failed to build graph analyses")?;
1004
1005    progress.report(IndexProgress::StageCompleted {
1006        stage_name: "Computing graph analyses",
1007        stage_duration: analysis_start.elapsed(),
1008    });
1009
1010    let dedup_edge_count = analyses.adjacency.edge_count as usize;
1011
1012    let analysis_strategies = vec![
1013        AnalysisStrategySummary {
1014            edge_kind: "calls",
1015            strategy: analyses.cond_calls.strategy,
1016        },
1017        AnalysisStrategySummary {
1018            edge_kind: "imports",
1019            strategy: analyses.cond_imports.strategy,
1020        },
1021        AnalysisStrategySummary {
1022            edge_kind: "references",
1023            strategy: analyses.cond_references.strategy,
1024        },
1025        AnalysisStrategySummary {
1026            edge_kind: "inherits",
1027            strategy: analyses.cond_inherits.strategy,
1028        },
1029    ];
1030
1031    // Step 7: Count workspace files by language using plugin detection
1032    let mut file_counts: std::collections::HashMap<String, usize> =
1033        std::collections::HashMap::new();
1034    for (file_id, file_path) in graph.indexed_files() {
1035        if graph.files().is_external(file_id) {
1036            continue;
1037        }
1038        let language = plugins
1039            .plugin_for_path(file_path)
1040            .map_or_else(|| "unknown".to_string(), |p| p.metadata().id.to_string());
1041        *file_counts.entry(language).or_insert(0) += 1;
1042    }
1043    let total_files: usize = file_counts.values().sum();
1044
1045    // Step 8: Construct Manifest in memory (with dedup edge count from analysis)
1046    let built_at = Utc::now().to_rfc3339();
1047
1048    let manifest = Manifest {
1049        schema_version: MANIFEST_SCHEMA_VERSION,
1050        snapshot_format_version: SNAPSHOT_FORMAT_VERSION,
1051        built_at: built_at.clone(),
1052        root_path: root.to_string_lossy().to_string(),
1053        node_count,
1054        edge_count: dedup_edge_count,
1055        raw_edge_count: Some(raw_edge_count),
1056        snapshot_sha256,
1057        build_provenance: BuildProvenance {
1058            sqry_version: env!("CARGO_PKG_VERSION").to_string(),
1059            build_timestamp: built_at.clone(),
1060            build_command: build_command.to_string(),
1061            plugin_hashes: std::collections::HashMap::default(),
1062        },
1063        file_count: file_counts.clone(),
1064        languages: Vec::default(),
1065        config: std::collections::HashMap::default(),
1066        confidence: graph.confidence().clone(),
1067        last_indexed_commit: get_git_head_commit(root),
1068        plugin_selection: plugin_selection.clone(),
1069    };
1070
1071    // Step 9: Serialize manifest to bytes and compute hash
1072    let manifest_bytes =
1073        serde_json::to_vec_pretty(&manifest).context("Failed to serialize manifest")?;
1074
1075    let manifest_hash = {
1076        let mut hasher = Sha256::new();
1077        hasher.update(&manifest_bytes);
1078        hex::encode(hasher.finalize())
1079    };
1080
1081    // Step 10: Construct AnalysisIdentity and persist all analyses
1082    let snapshot = graph.snapshot();
1083    let node_id_hash = compute_node_id_hash(&snapshot);
1084    let identity = AnalysisIdentity::new(manifest_hash, node_id_hash);
1085
1086    fs::create_dir_all(storage.analysis_dir()).with_context(|| {
1087        format!(
1088            "Failed to create analysis directory at {}",
1089            storage.analysis_dir().display()
1090        )
1091    })?;
1092
1093    progress.report(IndexProgress::SavingStarted {
1094        component_name: "graph analyses",
1095    });
1096
1097    analyses
1098        .persist_all(&storage, &identity)
1099        .context("Failed to persist graph analyses")?;
1100
1101    log::info!(
1102        "Graph analyses persisted to {}",
1103        storage.analysis_dir().display()
1104    );
1105
1106    progress.report(IndexProgress::SavingCompleted {
1107        component_name: "graph analyses",
1108        save_duration: analysis_start.elapsed(),
1109    });
1110
1111    // Step 11: Write manifest bytes to disk LAST (commit point)
1112    write_manifest_bytes_atomic(storage.manifest_path(), &manifest_bytes).with_context(|| {
1113        format!(
1114            "Failed to save manifest to {}",
1115            storage.manifest_path().display()
1116        )
1117    })?;
1118
1119    log::info!(
1120        "Manifest saved to {} (dedup edges: {}, raw edges: {})",
1121        storage.manifest_path().display(),
1122        dedup_edge_count,
1123        raw_edge_count
1124    );
1125
1126    let build_result = BuildResult {
1127        node_count,
1128        edge_count: dedup_edge_count,
1129        raw_edge_count,
1130        file_count: file_counts,
1131        total_files,
1132        built_at,
1133        root_path: root.to_string_lossy().to_string(),
1134        thread_count: effective_threads,
1135        active_plugin_ids: plugin_selection
1136            .map_or_else(Vec::new, |selection| selection.active_plugin_ids),
1137        analysis_strategies,
1138    };
1139
1140    Ok((graph, build_result))
1141}
1142
1143/// Build unified graph with progress, persist snapshot + manifest, and run analysis.
1144///
1145/// This is the single entry point for building a complete graph index. It combines:
1146/// 1. Graph building from source files (with progress reporting)
1147/// 2. Snapshot persistence (binary format)
1148/// 3. Analysis pipeline (CSR + SCC + Condensation DAG + labels/fallback) — strict, fails on error
1149/// 4. Manifest creation with deduplicated edge count (JSON metadata, written LAST)
1150///
1151/// The manifest is the "commit point" — written last, only after all other artifacts
1152/// succeed. Consumers check `storage.exists()` (manifest-based) for index readiness.
1153///
1154/// # Arguments
1155///
1156/// * `root` - Root directory to scan for source files
1157/// * `plugins` - Plugin manager for language-specific extraction
1158/// * `config` - Build configuration
1159/// * `build_command` - Provenance string (e.g., `"cli:index"`, `"mcp:rebuild_index"`)
1160/// * `progress` - Progress reporter for build status updates
1161///
1162/// # Errors
1163///
1164/// Returns an error if graph building, persistence, or analysis fails.
1165/// Analysis failure is strict — no fallback to raw edge counts.
1166#[allow(clippy::too_many_lines, clippy::needless_pass_by_value)]
1167pub fn build_and_persist_graph_with_progress(
1168    root: &Path,
1169    plugins: &PluginManager,
1170    config: &BuildConfig,
1171    build_command: &str,
1172    plugin_selection: Option<crate::graph::unified::persistence::PluginSelectionManifest>,
1173    progress: SharedReporter,
1174) -> Result<(CodeGraph, BuildResult)> {
1175    let (graph, effective_threads) = build_unified_graph_inner(
1176        root,
1177        plugins,
1178        config,
1179        progress.clone(),
1180        &CancellationToken::default(),
1181    )
1182    .map_err(anyhow::Error::from)?;
1183    persist_and_analyze_graph(
1184        graph,
1185        root,
1186        plugins,
1187        config,
1188        build_command,
1189        plugin_selection,
1190        progress,
1191        effective_threads,
1192    )
1193}
1194
1195/// Get the current HEAD commit SHA from a git repository.
1196#[must_use]
1197pub fn get_git_head_commit(path: &Path) -> Option<String> {
1198    let output = std::process::Command::new("git")
1199        .arg("-C")
1200        .arg(path)
1201        .args(["rev-parse", "HEAD"])
1202        .output()
1203        .ok()?;
1204
1205    if output.status.success() {
1206        let sha = String::from_utf8_lossy(&output.stdout).trim().to_string();
1207        if sha.len() == 40 && sha.chars().all(|c| c.is_ascii_hexdigit()) {
1208            return Some(sha);
1209        }
1210    }
1211    None
1212}
1213
1214/// Find source files in the given directory.
1215///
1216/// Uses the `ignore` crate to respect `.gitignore` files and standard ignore patterns.
1217fn find_source_files(root: &Path, config: &BuildConfig) -> Vec<std::path::PathBuf> {
1218    let mut builder = WalkBuilder::new(root);
1219
1220    builder
1221        .follow_links(config.follow_links)
1222        .hidden(!config.include_hidden)
1223        .git_ignore(true)
1224        .git_global(true)
1225        .git_exclude(true);
1226
1227    if let Some(depth) = config.max_depth {
1228        builder.max_depth(Some(depth));
1229    }
1230
1231    if let Some(threads) = config.num_threads {
1232        builder.threads(threads);
1233    }
1234
1235    let mut files = Vec::new();
1236
1237    for entry in builder.build() {
1238        let entry = match entry {
1239            Ok(entry) => entry,
1240            Err(err) => {
1241                log::warn!("Failed to read directory entry: {err}");
1242                continue;
1243            }
1244        };
1245
1246        if entry.file_type().is_some_and(|ft| ft.is_file()) {
1247            files.push(entry.into_path());
1248        }
1249    }
1250
1251    files
1252}
1253
1254fn sort_files_for_build(root: &Path, files: &mut [PathBuf]) {
1255    let normalized_root = normalize_path_components(root);
1256    files.sort_by(|left, right| {
1257        let left_key = file_sort_key(&normalized_root, left);
1258        let right_key = file_sort_key(&normalized_root, right);
1259        left_key.cmp(&right_key).then_with(|| left.cmp(right))
1260    });
1261}
1262
1263fn file_sort_key(root: &Path, path: &Path) -> String {
1264    let normalized_path = normalize_path_components(path);
1265    let relative = normalized_path
1266        .strip_prefix(root)
1267        .unwrap_or(normalized_path.as_path());
1268    let mut key = relative.to_string_lossy().replace('\\', "/");
1269    if cfg!(windows) {
1270        key = key.to_ascii_lowercase();
1271    }
1272    key
1273}
1274
1275/// Result of successfully parsing a single file (parallel-safe, no shared state).
1276///
1277/// `pub(super)` so sibling modules in `crate::graph::unified::build`
1278/// (specifically [`super::incremental`] from Task 4 Step 4 Phase 3c onward)
1279/// can construct and consume `ParsedFile` values when driving the
1280/// parse → commit pipeline against a `RebuildGraph`. The type stays
1281/// crate-private: external callers still route through the higher-level
1282/// `build_unified_graph` / `incremental_rebuild` entrypoints.
1283#[derive(Debug)]
1284pub(super) struct ParsedFile {
1285    /// Language identifier for file counting and confidence merging.
1286    pub(super) language: crate::graph::Language,
1287    /// Staged graph operations ready for serial commit.
1288    pub(super) staging: StagingGraph,
1289}
1290
1291/// Outcome of [`parse_file`]. `pub(super)` for the same reason as
1292/// [`ParsedFile`] — shared with [`super::incremental`]'s re-parse closure
1293/// driver in Phase 3c+. Still crate-private.
1294#[derive(Debug)]
1295pub(super) enum ParsedFileOutcome {
1296    Parsed(ParsedFile),
1297    Skipped,
1298    TimedOut {
1299        file: PathBuf,
1300        phase: &'static str,
1301        timeout_ms: u64,
1302    },
1303}
1304
1305/// Parse a single file into a `StagingGraph` without touching the shared graph.
1306///
1307/// This function is safe to call from multiple threads — it creates its own
1308/// parser, reads the file, and builds a self-contained staging graph.
1309///
1310/// Returns [`ParsedFileOutcome::Skipped`] if the file has no matching plugin or graph builder.
1311///
1312/// `pub(super)` as of Task 4 Step 4 Phase 3c so the sibling
1313/// [`super::incremental`] module can re-parse closure files against the
1314/// rebuild-local `GraphMutationTarget` plane during `incremental_rebuild`.
1315pub(super) fn parse_file(path: &Path, plugins: &PluginManager) -> Result<ParsedFileOutcome> {
1316    let plugin = plugins.plugin_for_path(path);
1317    let Some(plugin) = plugin else {
1318        return Ok(ParsedFileOutcome::Skipped);
1319    };
1320
1321    let Some(builder) = plugin.graph_builder() else {
1322        return Ok(ParsedFileOutcome::Skipped);
1323    };
1324
1325    let reader =
1326        FileReader::open(path).with_context(|| format!("failed to read {}", path.display()))?;
1327    let raw_content = reader.as_slice();
1328
1329    let safe_parser = SafeParser::new(SafeParserConfig::new().with_max_input_size(
1330        usize::try_from(crate::config::buffers::max_source_file_size()).unwrap_or(usize::MAX),
1331    ));
1332    let prepared_content = plugin.preprocess(raw_content);
1333    let parse_content = prepared_content.as_ref();
1334    let parse_start = Instant::now();
1335    let tree = safe_parser
1336        .parse_file(&plugin.language(), parse_content, path)
1337        .map_err(|err| map_parse_error(path, err))?;
1338    let parse_duration = parse_start.elapsed();
1339    if parse_duration >= Duration::from_secs(2) {
1340        log::warn!("Slow parse ({parse_duration:.2?}): {}", path.display());
1341    }
1342
1343    let mut staging = StagingGraph::new();
1344    let build_start = Instant::now();
1345    match builder.build_graph(&tree, parse_content, path, &mut staging) {
1346        Ok(()) => {}
1347        Err(GraphBuilderError::BuildTimedOut {
1348            phase, timeout_ms, ..
1349        }) => {
1350            return Ok(ParsedFileOutcome::TimedOut {
1351                file: path.to_path_buf(),
1352                phase,
1353                timeout_ms,
1354            });
1355        }
1356        Err(err) => return Err(map_builder_error(path, &err)),
1357    }
1358    let build_duration = build_start.elapsed();
1359    if build_duration >= Duration::from_secs(2) {
1360        log::warn!(
1361            "Slow graph build ({build_duration:.2?}): {}",
1362            path.display()
1363        );
1364    }
1365
1366    staging.attach_body_hashes(raw_content);
1367
1368    Ok(ParsedFileOutcome::Parsed(ParsedFile {
1369        language: builder.language(),
1370        staging,
1371    }))
1372}
1373
1374fn map_parse_error(path: &Path, err: ParseError) -> anyhow::Error {
1375    match err {
1376        ParseError::TreeSitterFailed => {
1377            anyhow::anyhow!("tree-sitter failed to parse {}", path.display())
1378        }
1379        ParseError::LanguageSetFailed(reason) => anyhow::anyhow!(
1380            "failed to configure tree-sitter for {}: {}",
1381            path.display(),
1382            reason
1383        ),
1384        ParseError::InputTooLarge { size, max, .. } => anyhow::anyhow!(
1385            "input too large for {}: {} bytes exceeds {} byte parser limit",
1386            path.display(),
1387            size,
1388            max
1389        ),
1390        ParseError::ParseTimedOut { timeout_micros, .. } => anyhow::anyhow!(
1391            "parse timed out for {} after {} ms",
1392            path.display(),
1393            timeout_micros / 1000
1394        ),
1395        ParseError::ParseCancelled { reason, .. } => {
1396            anyhow::anyhow!("parse cancelled for {}: {}", path.display(), reason)
1397        }
1398        _ => anyhow::anyhow!("parse error in {}: {:?}", path.display(), err),
1399    }
1400}
1401
1402fn map_builder_error(path: &Path, err: &GraphBuilderError) -> anyhow::Error {
1403    anyhow::anyhow!("graph builder error in {}: {}", path.display(), err)
1404}
1405
1406// ---------------------------------------------------------------------------
1407// Test-only hooks (Task 7 Phase 7c)
1408// ---------------------------------------------------------------------------
1409//
1410// Thread-local callbacks fired at pass boundaries inside
1411// `build_unified_graph_inner`. Tests that need to flip the
1412// `CancellationToken` between chunks / before Phase 4 / before Pass 5
1413// install a hook, trigger a rebuild, and observe the pipeline
1414// short-circuit.
1415//
1416// Follows the same pattern as [`incremental::testing`] (see
1417// `incremental.rs:1605`): the module is gated on
1418// `any(test, feature = "rebuild-internals")` and production builds
1419// compile every call site into `let _ = ...;` no-ops.
1420/// Test-only hooks exposed so `sqry-daemon` integration tests can
1421/// drive cancellation-boundary scenarios in `build_unified_graph_inner`
1422/// without reaching into private module state.
1423///
1424/// Gated on `any(test, feature = "rebuild-internals")`; production
1425/// builds compile the module out.
1426#[cfg(any(test, feature = "rebuild-internals"))]
1427pub mod testing {
1428    use super::CancellationToken;
1429    use std::cell::RefCell;
1430
1431    /// Callback invoked at the top of each chunk iteration in
1432    /// `build_unified_graph_inner`, receiving the current cancellation
1433    /// token. Tests typically call `token.cancel()` after N chunks to
1434    /// assert the pipeline short-circuits at the next boundary.
1435    pub type AfterChunkHook = Box<dyn FnMut(&CancellationToken)>;
1436    /// Callback invoked once after the chunk loop exits and before
1437    /// Phase 4 finalization.
1438    pub type BeforePhase4Hook = Box<dyn FnMut(&CancellationToken)>;
1439    /// Callback invoked once before Pass 5 cross-language linking.
1440    pub type BeforePass5Hook = Box<dyn FnMut(&CancellationToken)>;
1441
1442    thread_local! {
1443        static AFTER_CHUNK_HOOK: RefCell<Option<AfterChunkHook>> = const { RefCell::new(None) };
1444        static BEFORE_PHASE4_HOOK: RefCell<Option<BeforePhase4Hook>> = const { RefCell::new(None) };
1445        static BEFORE_PASS5_HOOK: RefCell<Option<BeforePass5Hook>> = const { RefCell::new(None) };
1446    }
1447
1448    /// Install a callback that runs at the top of each chunk iteration.
1449    /// Replaces any previously-installed hook on the current thread.
1450    pub fn set_after_chunk_hook<F>(hook: F) -> Option<AfterChunkHook>
1451    where
1452        F: FnMut(&CancellationToken) + 'static,
1453    {
1454        AFTER_CHUNK_HOOK.with(|cell| cell.replace(Some(Box::new(hook))))
1455    }
1456
1457    /// Remove the currently-installed after-chunk hook. Idempotent.
1458    pub fn clear_after_chunk_hook() {
1459        AFTER_CHUNK_HOOK.with(|cell| {
1460            let _ = cell.replace(None);
1461        });
1462    }
1463
1464    /// Install a callback that runs after the chunk loop exits, before
1465    /// Phase 4 finalization. Replaces any previously-installed hook.
1466    pub fn set_before_phase4_hook<F>(hook: F) -> Option<BeforePhase4Hook>
1467    where
1468        F: FnMut(&CancellationToken) + 'static,
1469    {
1470        BEFORE_PHASE4_HOOK.with(|cell| cell.replace(Some(Box::new(hook))))
1471    }
1472
1473    /// Remove the currently-installed before-Phase-4 hook. Idempotent.
1474    pub fn clear_before_phase4_hook() {
1475        BEFORE_PHASE4_HOOK.with(|cell| {
1476            let _ = cell.replace(None);
1477        });
1478    }
1479
1480    /// Install a callback that runs before Pass 5 cross-language linking.
1481    /// Replaces any previously-installed hook.
1482    pub fn set_before_pass5_hook<F>(hook: F) -> Option<BeforePass5Hook>
1483    where
1484        F: FnMut(&CancellationToken) + 'static,
1485    {
1486        BEFORE_PASS5_HOOK.with(|cell| cell.replace(Some(Box::new(hook))))
1487    }
1488
1489    /// Remove the currently-installed before-Pass-5 hook. Idempotent.
1490    pub fn clear_before_pass5_hook() {
1491        BEFORE_PASS5_HOOK.with(|cell| {
1492            let _ = cell.replace(None);
1493        });
1494    }
1495
1496    /// Fire the installed after-chunk hook (if any). Called from
1497    /// `build_unified_graph_inner` at the top of every chunk iteration.
1498    pub(super) fn fire_after_chunk_hook(cancellation: &CancellationToken) {
1499        AFTER_CHUNK_HOOK.with(|cell| {
1500            if let Some(hook) = cell.borrow_mut().as_mut() {
1501                hook(cancellation);
1502            }
1503        });
1504    }
1505
1506    /// Fire the installed before-Phase-4 hook (if any).
1507    pub(super) fn fire_before_phase4_hook(cancellation: &CancellationToken) {
1508        BEFORE_PHASE4_HOOK.with(|cell| {
1509            if let Some(hook) = cell.borrow_mut().as_mut() {
1510                hook(cancellation);
1511            }
1512        });
1513    }
1514
1515    /// Fire the installed before-Pass-5 hook (if any).
1516    pub(super) fn fire_before_pass5_hook(cancellation: &CancellationToken) {
1517        BEFORE_PASS5_HOOK.with(|cell| {
1518            if let Some(hook) = cell.borrow_mut().as_mut() {
1519                hook(cancellation);
1520            }
1521        });
1522    }
1523
1524    /// RAII guard that installs an after-chunk hook on construction
1525    /// and clears it on drop. Prevents a panic mid-test from leaking
1526    /// a hook into a sibling test on the same thread.
1527    pub struct AfterChunkHookGuard {
1528        _sealed: (),
1529    }
1530
1531    impl AfterChunkHookGuard {
1532        /// Install `hook` as the thread-local after-chunk callback.
1533        pub fn install<F>(hook: F) -> Self
1534        where
1535            F: FnMut(&CancellationToken) + 'static,
1536        {
1537            let _previous = set_after_chunk_hook(hook);
1538            Self { _sealed: () }
1539        }
1540    }
1541
1542    impl Drop for AfterChunkHookGuard {
1543        fn drop(&mut self) {
1544            clear_after_chunk_hook();
1545        }
1546    }
1547
1548    /// RAII guard that installs a before-Phase-4 hook on construction
1549    /// and clears it on drop.
1550    pub struct BeforePhase4HookGuard {
1551        _sealed: (),
1552    }
1553
1554    impl BeforePhase4HookGuard {
1555        /// Install `hook` as the thread-local before-Phase-4 callback.
1556        pub fn install<F>(hook: F) -> Self
1557        where
1558            F: FnMut(&CancellationToken) + 'static,
1559        {
1560            let _previous = set_before_phase4_hook(hook);
1561            Self { _sealed: () }
1562        }
1563    }
1564
1565    impl Drop for BeforePhase4HookGuard {
1566        fn drop(&mut self) {
1567            clear_before_phase4_hook();
1568        }
1569    }
1570
1571    /// RAII guard that installs a before-Pass-5 hook on construction
1572    /// and clears it on drop.
1573    pub struct BeforePass5HookGuard {
1574        _sealed: (),
1575    }
1576
1577    impl BeforePass5HookGuard {
1578        /// Install `hook` as the thread-local before-Pass-5 callback.
1579        pub fn install<F>(hook: F) -> Self
1580        where
1581            F: FnMut(&CancellationToken) + 'static,
1582        {
1583            let _previous = set_before_pass5_hook(hook);
1584            Self { _sealed: () }
1585        }
1586    }
1587
1588    impl Drop for BeforePass5HookGuard {
1589        fn drop(&mut self) {
1590            clear_before_pass5_hook();
1591        }
1592    }
1593}
1594
1595#[cfg(test)]
1596mod tests {
1597    use super::*;
1598    use crate::ast::Scope;
1599    use crate::graph::{GraphBuilder, GraphBuilderError, GraphResult, Language};
1600    use crate::plugin::error::{ParseError, ScopeError};
1601    use crate::plugin::{LanguageMetadata, LanguagePlugin};
1602    use std::fs;
1603    use std::path::Path;
1604    use tempfile::TempDir;
1605    use tree_sitter::{Parser, Tree};
1606
1607    const RUST_TEST_EXTENSIONS: &[&str] = &["rs"];
1608    const FILENAME_MATCH_EXTENSIONS: &[&str] = &["rmd", "bash_profile"];
1609
1610    /// Test helper: commit a single parsed file to a graph using the serial path.
1611    ///
1612    /// This is only used in tests to verify parse-and-commit without running the
1613    /// full parallel pipeline. It replicates the old `commit_staged_file` logic.
1614    fn commit_parsed_file_for_test(path: &Path, mut parsed: ParsedFile, graph: &mut CodeGraph) {
1615        let file_id = graph
1616            .files_mut()
1617            .register_with_language(path, Some(parsed.language))
1618            .expect("register file");
1619        parsed.staging.apply_file_id(file_id);
1620        let string_remap = parsed
1621            .staging
1622            .commit_strings(graph.strings_mut())
1623            .expect("commit strings");
1624        parsed
1625            .staging
1626            .apply_string_remap(&string_remap)
1627            .expect("apply string remap");
1628        let node_id_mapping = parsed
1629            .staging
1630            .commit_nodes(graph.nodes_mut())
1631            .expect("commit nodes");
1632        let edges = parsed.staging.get_remapped_edges(&node_id_mapping);
1633        for edge in edges {
1634            graph.edges_mut().add_edge_with_spans(
1635                edge.source,
1636                edge.target,
1637                edge.kind.clone(),
1638                file_id,
1639                edge.spans.clone(),
1640            );
1641        }
1642    }
1643
1644    fn expect_parsed_file(outcome: ParsedFileOutcome) -> ParsedFile {
1645        match outcome {
1646            ParsedFileOutcome::Parsed(parsed) => parsed,
1647            ParsedFileOutcome::Skipped => panic!("expected parsed file, got skipped outcome"),
1648            ParsedFileOutcome::TimedOut { file, phase, .. } => {
1649                panic!(
1650                    "expected parsed file, got timeout outcome for {} during {}",
1651                    file.display(),
1652                    phase,
1653                )
1654            }
1655        }
1656    }
1657
1658    fn parse_rust_ast(content: &[u8]) -> Result<Tree, ParseError> {
1659        let mut parser = Parser::new();
1660        let language = tree_sitter_rust::LANGUAGE.into();
1661        parser
1662            .set_language(&language)
1663            .map_err(|err| ParseError::LanguageSetFailed(err.to_string()))?;
1664        parser
1665            .parse(content, None)
1666            .ok_or(ParseError::TreeSitterFailed)
1667    }
1668
1669    struct TestPlugin {
1670        metadata: LanguageMetadata,
1671        extensions: &'static [&'static str],
1672        builder: Option<Box<dyn GraphBuilder>>,
1673    }
1674
1675    impl TestPlugin {
1676        fn new(
1677            id: &'static str,
1678            extensions: &'static [&'static str],
1679            builder: Option<Box<dyn GraphBuilder>>,
1680        ) -> Self {
1681            Self {
1682                metadata: LanguageMetadata {
1683                    id,
1684                    name: "Rust",
1685                    version: "test",
1686                    author: "sqry-core tests",
1687                    description: "Test-only Rust plugin for unified graph entrypoint tests",
1688                    tree_sitter_version: "0.25",
1689                },
1690                extensions,
1691                builder,
1692            }
1693        }
1694    }
1695
1696    impl LanguagePlugin for TestPlugin {
1697        fn metadata(&self) -> LanguageMetadata {
1698            self.metadata.clone()
1699        }
1700
1701        fn extensions(&self) -> &'static [&'static str] {
1702            self.extensions
1703        }
1704
1705        fn language(&self) -> tree_sitter::Language {
1706            tree_sitter_rust::LANGUAGE.into()
1707        }
1708
1709        fn parse_ast(&self, content: &[u8]) -> Result<Tree, ParseError> {
1710            parse_rust_ast(content)
1711        }
1712
1713        fn extract_scopes(
1714            &self,
1715            _tree: &Tree,
1716            _content: &[u8],
1717            _file_path: &Path,
1718        ) -> Result<Vec<Scope>, ScopeError> {
1719            Ok(Vec::new())
1720        }
1721
1722        fn graph_builder(&self) -> Option<&dyn crate::graph::GraphBuilder> {
1723            self.builder.as_deref()
1724        }
1725    }
1726
1727    struct FailingGraphBuilder;
1728
1729    impl GraphBuilder for FailingGraphBuilder {
1730        fn build_graph(
1731            &self,
1732            _tree: &Tree,
1733            _content: &[u8],
1734            _file: &Path,
1735            _staging: &mut StagingGraph,
1736        ) -> GraphResult<()> {
1737            Err(GraphBuilderError::CrossLanguageError {
1738                reason: "forced failure".to_string(),
1739            })
1740        }
1741
1742        fn language(&self) -> Language {
1743            Language::Rust
1744        }
1745    }
1746
1747    struct NoopGraphBuilder;
1748
1749    impl GraphBuilder for NoopGraphBuilder {
1750        fn build_graph(
1751            &self,
1752            _tree: &Tree,
1753            _content: &[u8],
1754            _file: &Path,
1755            _staging: &mut StagingGraph,
1756        ) -> GraphResult<()> {
1757            Ok(())
1758        }
1759
1760        fn language(&self) -> Language {
1761            Language::Rust
1762        }
1763    }
1764
1765    struct TimeoutGraphBuilder;
1766
1767    impl GraphBuilder for TimeoutGraphBuilder {
1768        fn build_graph(
1769            &self,
1770            _tree: &Tree,
1771            _content: &[u8],
1772            file: &Path,
1773            _staging: &mut StagingGraph,
1774        ) -> GraphResult<()> {
1775            Err(GraphBuilderError::BuildTimedOut {
1776                file: file.to_path_buf(),
1777                phase: "test-timeout",
1778                timeout_ms: 42,
1779            })
1780        }
1781
1782        fn language(&self) -> Language {
1783            Language::Rust
1784        }
1785    }
1786
1787    struct SelectiveTimeoutGraphBuilder;
1788
1789    impl GraphBuilder for SelectiveTimeoutGraphBuilder {
1790        fn build_graph(
1791            &self,
1792            _tree: &Tree,
1793            _content: &[u8],
1794            file: &Path,
1795            staging: &mut StagingGraph,
1796        ) -> GraphResult<()> {
1797            use crate::graph::unified::build::helper::GraphBuildHelper;
1798
1799            let mut helper = GraphBuildHelper::new(staging, file, Language::Rust);
1800            let file_name = file
1801                .file_name()
1802                .and_then(|value| value.to_str())
1803                .unwrap_or_default();
1804
1805            if file_name == "timeout.rs" {
1806                helper.add_function("timeout_partial", None, false, false);
1807                return Err(GraphBuilderError::BuildTimedOut {
1808                    file: file.to_path_buf(),
1809                    phase: "test-timeout",
1810                    timeout_ms: 42,
1811                });
1812            }
1813
1814            helper.add_function("survivor_fn", None, false, false);
1815            Ok(())
1816        }
1817
1818        fn language(&self) -> Language {
1819            Language::Rust
1820        }
1821    }
1822
1823    #[test]
1824    fn test_build_config_default() {
1825        let config = BuildConfig::default();
1826        assert_eq!(config.max_depth, None);
1827        assert!(!config.follow_links);
1828        assert!(!config.include_hidden);
1829        assert_eq!(config.num_threads, None);
1830    }
1831
1832    #[test]
1833    fn test_build_unified_graph_empty_registry_error() {
1834        let plugins = PluginManager::new();
1835        let config = BuildConfig::default();
1836        let root = std::path::Path::new(".");
1837
1838        let result = build_unified_graph(root, &plugins, &config);
1839        let err = result.expect_err("empty registry must error");
1840        // Task 7 Phase 7c: the internal pipeline now returns
1841        // `GraphBuilderError::Internal { reason }` instead of a bare
1842        // `anyhow::bail!`. The legacy `build_unified_graph` wrapper
1843        // lifts through `anyhow::Error::from`, which prefixes the
1844        // reason with the `GraphBuilderError::Internal` `Display`
1845        // string (`Internal graph builder error: ...`).
1846        assert_eq!(
1847            err.to_string(),
1848            "Internal graph builder error: No graph builders registered – cannot build code graph"
1849        );
1850    }
1851
1852    #[test]
1853    fn test_build_unified_graph_no_graph_builders_error() {
1854        let mut plugins = PluginManager::new();
1855        plugins.register_builtin(Box::new(TestPlugin::new(
1856            "rust-no-graph-builder",
1857            RUST_TEST_EXTENSIONS,
1858            None,
1859        )));
1860        let config = BuildConfig::default();
1861        let root = std::path::Path::new(".");
1862
1863        let result = build_unified_graph(root, &plugins, &config);
1864        let err = result.expect_err("no graph builders must error");
1865        assert_eq!(
1866            err.to_string(),
1867            "Internal graph builder error: No graph builders registered – cannot build code graph"
1868        );
1869    }
1870
1871    #[test]
1872    fn test_build_unified_graph_all_failures_error() {
1873        let temp_dir = TempDir::new().expect("temp dir");
1874        let file_path = temp_dir.path().join("fail.rs");
1875        fs::write(&file_path, "fn main() {}").expect("write test file");
1876
1877        let mut plugins = PluginManager::new();
1878        plugins.register_builtin(Box::new(TestPlugin::new(
1879            "rust-failing-graph-builder",
1880            RUST_TEST_EXTENSIONS,
1881            Some(Box::new(FailingGraphBuilder)),
1882        )));
1883        let config = BuildConfig::default();
1884
1885        let result = build_unified_graph(temp_dir.path(), &plugins, &config);
1886        let err = result.expect_err("all-failures must error");
1887        assert_eq!(
1888            err.to_string(),
1889            "Internal graph builder error: All graph builds failed"
1890        );
1891    }
1892
1893    #[test]
1894    fn test_parse_file_matches_uppercase_extension() {
1895        let temp_dir = TempDir::new().expect("temp dir");
1896        let file_path = temp_dir.path().join("report.Rmd");
1897        fs::write(&file_path, "fn main() {}").expect("write test file");
1898
1899        let mut plugins = PluginManager::new();
1900        plugins.register_builtin(Box::new(TestPlugin::new(
1901            "rust-filename-match",
1902            FILENAME_MATCH_EXTENSIONS,
1903            Some(Box::new(NoopGraphBuilder)),
1904        )));
1905        let mut graph = CodeGraph::new();
1906
1907        let parsed = expect_parsed_file(parse_file(&file_path, &plugins).expect("parse file"));
1908        commit_parsed_file_for_test(&file_path, parsed, &mut graph);
1909    }
1910
1911    #[test]
1912    fn test_parse_file_matches_dotless_filename() {
1913        let temp_dir = TempDir::new().expect("temp dir");
1914        let file_path = temp_dir.path().join("bash_profile");
1915        fs::write(&file_path, "fn main() {}").expect("write test file");
1916
1917        let mut plugins = PluginManager::new();
1918        plugins.register_builtin(Box::new(TestPlugin::new(
1919            "rust-filename-match",
1920            FILENAME_MATCH_EXTENSIONS,
1921            Some(Box::new(NoopGraphBuilder)),
1922        )));
1923        let mut graph = CodeGraph::new();
1924
1925        let parsed = expect_parsed_file(parse_file(&file_path, &plugins).expect("parse file"));
1926        commit_parsed_file_for_test(&file_path, parsed, &mut graph);
1927    }
1928
1929    #[test]
1930    fn test_parse_file_matches_pulumi_stack_filename() {
1931        let temp_dir = TempDir::new().expect("temp dir");
1932        let file_path = temp_dir.path().join("Pulumi.dev.yaml");
1933        fs::write(&file_path, "fn main() {}").expect("write test file");
1934
1935        let mut plugins = PluginManager::new();
1936        plugins.register_builtin(Box::new(TestPlugin::new(
1937            "pulumi",
1938            &["pulumi.yaml"],
1939            Some(Box::new(NoopGraphBuilder)),
1940        )));
1941        let mut graph = CodeGraph::new();
1942
1943        let parsed = expect_parsed_file(parse_file(&file_path, &plugins).expect("parse file"));
1944        commit_parsed_file_for_test(&file_path, parsed, &mut graph);
1945    }
1946
1947    #[test]
1948    fn test_parse_file_returns_timed_out_outcome() {
1949        let temp_dir = TempDir::new().expect("temp dir");
1950        let file_path = temp_dir.path().join("timeout.rs");
1951        fs::write(&file_path, "fn main() {}").expect("write test file");
1952
1953        let mut plugins = PluginManager::new();
1954        plugins.register_builtin(Box::new(TestPlugin::new(
1955            "rust-timeout",
1956            RUST_TEST_EXTENSIONS,
1957            Some(Box::new(TimeoutGraphBuilder)),
1958        )));
1959
1960        let outcome = parse_file(&file_path, &plugins).expect("parse file");
1961        match outcome {
1962            ParsedFileOutcome::TimedOut {
1963                file,
1964                phase,
1965                timeout_ms,
1966            } => {
1967                assert_eq!(file, file_path);
1968                assert_eq!(phase, "test-timeout");
1969                assert_eq!(timeout_ms, 42);
1970            }
1971            other => panic!("expected timed out outcome, got {other:?}"),
1972        }
1973    }
1974
1975    #[test]
1976    fn test_parse_file_rejects_oversized_input() {
1977        let temp_dir = TempDir::new().expect("temp dir");
1978        let file_path = temp_dir.path().join("oversized.rs");
1979        fs::write(&file_path, vec![b'a'; 1_048_577]).expect("write oversized file");
1980
1981        let mut plugins = PluginManager::new();
1982        plugins.register_builtin(Box::new(TestPlugin::new(
1983            "rust-oversized",
1984            RUST_TEST_EXTENSIONS,
1985            Some(Box::new(NoopGraphBuilder)),
1986        )));
1987
1988        unsafe {
1989            std::env::set_var("SQRY_MAX_SOURCE_FILE_SIZE", "1048576");
1990        }
1991        let err = parse_file(&file_path, &plugins).expect_err("oversized file should fail");
1992        unsafe {
1993            std::env::remove_var("SQRY_MAX_SOURCE_FILE_SIZE");
1994        }
1995
1996        let err_text = err.to_string();
1997        assert!(err_text.contains("oversized.rs"));
1998    }
1999
2000    #[test]
2001    fn test_build_unified_graph_skips_timed_out_file_without_partial_commit() {
2002        let temp_dir = TempDir::new().expect("temp dir");
2003        let ok_path = temp_dir.path().join("ok.rs");
2004        let timeout_path = temp_dir.path().join("timeout.rs");
2005        fs::write(&ok_path, "fn ok() {}").expect("write ok file");
2006        fs::write(&timeout_path, "fn timeout() {}").expect("write timeout file");
2007
2008        let mut plugins = PluginManager::new();
2009        plugins.register_builtin(Box::new(TestPlugin::new(
2010            "rust-selective-timeout",
2011            RUST_TEST_EXTENSIONS,
2012            Some(Box::new(SelectiveTimeoutGraphBuilder)),
2013        )));
2014        let config = BuildConfig::default();
2015
2016        let graph = build_unified_graph(temp_dir.path(), &plugins, &config)
2017            .expect("graph build should succeed with surviving files");
2018        let snapshot = graph.snapshot();
2019
2020        assert_eq!(snapshot.find_by_pattern("survivor_fn").len(), 1);
2021        assert!(
2022            snapshot.find_by_pattern("timeout_partial").is_empty(),
2023            "timed out file staging must not be committed"
2024        );
2025    }
2026
2027    // ========================================================================
2028    // Build pipeline consolidation regression tests
2029    // ========================================================================
2030
2031    /// A graph builder that creates a few nodes and edges for testing.
2032    struct SimpleGraphBuilder;
2033
2034    impl GraphBuilder for SimpleGraphBuilder {
2035        fn build_graph(
2036            &self,
2037            _tree: &Tree,
2038            _content: &[u8],
2039            file: &Path,
2040            staging: &mut StagingGraph,
2041        ) -> GraphResult<()> {
2042            use crate::graph::unified::build::helper::GraphBuildHelper;
2043
2044            let mut helper = GraphBuildHelper::new(staging, file, Language::Rust);
2045
2046            // Create two function nodes
2047            let fn1 = helper.add_function("main", None, false, false);
2048            let fn2 = helper.add_function("helper", None, false, false);
2049
2050            // Add a Calls edge from main -> helper
2051            helper.add_call_edge(fn1, fn2);
2052
2053            Ok(())
2054        }
2055
2056        fn language(&self) -> Language {
2057            Language::Rust
2058        }
2059    }
2060
2061    /// `build_and_persist_graph` returns a populated `BuildResult`.
2062    #[test]
2063    fn test_build_and_persist_graph_returns_build_result() {
2064        let temp_dir = TempDir::new().expect("temp dir");
2065        let file_path = temp_dir.path().join("test.rs");
2066        fs::write(&file_path, "fn main() {} fn helper() {}").expect("write test file");
2067
2068        let mut plugins = PluginManager::new();
2069        plugins.register_builtin(Box::new(TestPlugin::new(
2070            "rust-simple",
2071            RUST_TEST_EXTENSIONS,
2072            Some(Box::new(SimpleGraphBuilder)),
2073        )));
2074        let config = BuildConfig::default();
2075
2076        let result =
2077            build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:build_result");
2078        assert!(result.is_ok(), "build_and_persist_graph should succeed");
2079
2080        let (_graph, build_result) = result.unwrap();
2081        assert!(build_result.node_count > 0, "Should have nodes");
2082        assert!(build_result.total_files > 0, "Should have indexed files");
2083        assert!(!build_result.built_at.is_empty(), "Should have timestamp");
2084        assert!(!build_result.root_path.is_empty(), "Should have root path");
2085    }
2086
2087    /// Deduplicated `edge_count` is always <= `raw_edge_count`.
2088    #[test]
2089    fn test_build_result_edge_count_le_raw() {
2090        let temp_dir = TempDir::new().expect("temp dir");
2091        let file_path = temp_dir.path().join("test.rs");
2092        fs::write(&file_path, "fn main() {} fn helper() {}").expect("write test file");
2093
2094        let mut plugins = PluginManager::new();
2095        plugins.register_builtin(Box::new(TestPlugin::new(
2096            "rust-simple",
2097            RUST_TEST_EXTENSIONS,
2098            Some(Box::new(SimpleGraphBuilder)),
2099        )));
2100        let config = BuildConfig::default();
2101
2102        let (_graph, build_result) =
2103            build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:edge_count").unwrap();
2104
2105        assert!(
2106            build_result.edge_count <= build_result.raw_edge_count,
2107            "Deduplicated edge count ({}) should be <= raw edge count ({})",
2108            build_result.edge_count,
2109            build_result.raw_edge_count
2110        );
2111    }
2112
2113    /// File counts use plugin detection (keyed by plugin ID).
2114    #[test]
2115    fn test_build_and_persist_graph_file_counts_use_plugins() {
2116        let temp_dir = TempDir::new().expect("temp dir");
2117        let file_path = temp_dir.path().join("test.rs");
2118        fs::write(&file_path, "fn main() {}").expect("write test file");
2119
2120        let mut plugins = PluginManager::new();
2121        plugins.register_builtin(Box::new(TestPlugin::new(
2122            "rust-simple",
2123            RUST_TEST_EXTENSIONS,
2124            Some(Box::new(SimpleGraphBuilder)),
2125        )));
2126        let config = BuildConfig::default();
2127
2128        let (_graph, build_result) =
2129            build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:file_counts")
2130                .unwrap();
2131
2132        // File counts should include the plugin's ID as the language key
2133        assert!(
2134            !build_result.file_count.is_empty(),
2135            "File counts should not be empty"
2136        );
2137        assert!(
2138            build_result.file_count.contains_key("rust-simple"),
2139            "File counts should use plugin ID. Got: {:?}",
2140            build_result.file_count
2141        );
2142    }
2143
2144    /// Manifest `edge_count` matches `BuildResult` (deduplicated).
2145    #[test]
2146    fn test_manifest_edge_count_is_deduplicated() {
2147        use crate::graph::unified::persistence::GraphStorage;
2148
2149        let temp_dir = TempDir::new().expect("temp dir");
2150        let file_path = temp_dir.path().join("test.rs");
2151        fs::write(&file_path, "fn main() {} fn helper() {}").expect("write test file");
2152
2153        let mut plugins = PluginManager::new();
2154        plugins.register_builtin(Box::new(TestPlugin::new(
2155            "rust-simple",
2156            RUST_TEST_EXTENSIONS,
2157            Some(Box::new(SimpleGraphBuilder)),
2158        )));
2159        let config = BuildConfig::default();
2160
2161        let (_graph, build_result) =
2162            build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:manifest_dedup")
2163                .unwrap();
2164
2165        // Load manifest and verify edge counts match BuildResult
2166        let storage = GraphStorage::new(temp_dir.path());
2167        assert!(storage.exists(), "Manifest should exist after build");
2168
2169        let manifest = storage.load_manifest().unwrap();
2170        assert_eq!(
2171            manifest.edge_count, build_result.edge_count,
2172            "Manifest edge_count should match BuildResult (deduplicated)"
2173        );
2174        assert_eq!(
2175            manifest.raw_edge_count,
2176            Some(build_result.raw_edge_count),
2177            "Manifest raw_edge_count should match BuildResult"
2178        );
2179    }
2180
2181    /// Build command provenance is recorded in the manifest.
2182    #[test]
2183    fn test_build_command_provenance() {
2184        use crate::graph::unified::persistence::GraphStorage;
2185
2186        let temp_dir = TempDir::new().expect("temp dir");
2187        let file_path = temp_dir.path().join("test.rs");
2188        fs::write(&file_path, "fn main() {}").expect("write test file");
2189
2190        let mut plugins = PluginManager::new();
2191        plugins.register_builtin(Box::new(TestPlugin::new(
2192            "rust-simple",
2193            RUST_TEST_EXTENSIONS,
2194            Some(Box::new(SimpleGraphBuilder)),
2195        )));
2196        let config = BuildConfig::default();
2197
2198        build_and_persist_graph(temp_dir.path(), &plugins, &config, "cli:index").unwrap();
2199
2200        let storage = GraphStorage::new(temp_dir.path());
2201        let manifest = storage.load_manifest().unwrap();
2202        assert_eq!(
2203            manifest.build_provenance.build_command, "cli:index",
2204            "Build command provenance should match"
2205        );
2206    }
2207
2208    /// Wrapper-based builds infer plugin-selection provenance from the active
2209    /// plugin manager so non-CLI callers do not silently persist legacy-looking
2210    /// manifests.
2211    #[test]
2212    fn test_wrapper_infers_plugin_selection_from_manager() {
2213        use crate::graph::unified::persistence::GraphStorage;
2214
2215        let temp_dir = TempDir::new().expect("temp dir");
2216        let file_path = temp_dir.path().join("test.rs");
2217        fs::write(&file_path, "fn main() {}").expect("write test file");
2218
2219        let mut plugins = PluginManager::new();
2220        plugins.register_builtin(Box::new(TestPlugin::new(
2221            "rust-simple",
2222            RUST_TEST_EXTENSIONS,
2223            Some(Box::new(SimpleGraphBuilder)),
2224        )));
2225        let config = BuildConfig::default();
2226
2227        let (_graph, build_result) =
2228            build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:wrapper_plugins")
2229                .expect("wrapper build should succeed");
2230
2231        assert_eq!(
2232            build_result.active_plugin_ids,
2233            vec!["rust-simple".to_string()],
2234            "build result should expose the inferred active plugin ids"
2235        );
2236
2237        let storage = GraphStorage::new(temp_dir.path());
2238        let manifest = storage.load_manifest().expect("manifest should load");
2239        let plugin_selection = manifest
2240            .plugin_selection
2241            .expect("wrapper should persist plugin selection metadata");
2242        assert_eq!(
2243            plugin_selection.active_plugin_ids,
2244            vec!["rust-simple".to_string()],
2245            "wrapper should persist the manager-derived plugin ids"
2246        );
2247        assert_eq!(
2248            plugin_selection.high_cost_mode, None,
2249            "wrapper-inferred plugin selection should keep high_cost_mode diagnostic-only"
2250        );
2251    }
2252
2253    /// Analysis identity hash matches the on-disk manifest bytes hash.
2254    #[test]
2255    fn test_analysis_identity_matches_manifest_hash() {
2256        use crate::graph::unified::analysis::persistence::load_csr;
2257        use crate::graph::unified::persistence::GraphStorage;
2258        use sha2::{Digest, Sha256};
2259
2260        let temp_dir = TempDir::new().expect("temp dir");
2261        let file_path = temp_dir.path().join("test.rs");
2262        fs::write(&file_path, "fn main() {} fn helper() {}").expect("write test file");
2263
2264        let mut plugins = PluginManager::new();
2265        plugins.register_builtin(Box::new(TestPlugin::new(
2266            "rust-simple",
2267            RUST_TEST_EXTENSIONS,
2268            Some(Box::new(SimpleGraphBuilder)),
2269        )));
2270        let config = BuildConfig::default();
2271
2272        build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:identity").unwrap();
2273
2274        let storage = GraphStorage::new(temp_dir.path());
2275
2276        // Compute manifest hash from on-disk manifest bytes
2277        let manifest_bytes = std::fs::read(storage.manifest_path()).unwrap();
2278        let expected_hash = hex::encode(Sha256::digest(&manifest_bytes));
2279
2280        // Load analysis identity from the CSR file (identity is embedded in each analysis file)
2281        let (_csr, identity) = load_csr(&storage.analysis_csr_path()).unwrap();
2282
2283        assert_eq!(
2284            identity.manifest_hash, expected_hash,
2285            "On-disk manifest hash should equal analysis identity hash"
2286        );
2287    }
2288
2289    /// Regression test: old manifest is removed at start of rebuild.
2290    ///
2291    /// Verifies that `build_and_persist_graph_with_progress()` removes any
2292    /// existing manifest before writing the new snapshot. This prevents the
2293    /// inconsistent state where an old manifest pairs with a new snapshot
2294    /// after an interrupted rebuild.
2295    #[test]
2296    fn test_old_manifest_removed_during_rebuild() {
2297        use crate::graph::unified::persistence::GraphStorage;
2298
2299        let temp_dir = tempfile::TempDir::new().unwrap();
2300        let src = temp_dir.path().join("lib.rs");
2301        std::fs::write(&src, "fn main() {}").unwrap();
2302
2303        // Build an initial index
2304        let mut plugins = PluginManager::new();
2305        plugins.register_builtin(Box::new(TestPlugin::new(
2306            "rust-simple",
2307            RUST_TEST_EXTENSIONS,
2308            Some(Box::new(SimpleGraphBuilder)),
2309        )));
2310        let config = BuildConfig::default();
2311        build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:initial").unwrap();
2312
2313        let storage = GraphStorage::new(temp_dir.path());
2314        assert!(
2315            storage.exists(),
2316            "Manifest should exist after initial build"
2317        );
2318
2319        // Record the original manifest's built_at timestamp
2320        let original_manifest = storage.load_manifest().unwrap();
2321        let original_built_at = original_manifest.built_at.clone();
2322
2323        // Rebuild — during the build, the old manifest should be removed first
2324        build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:rebuild").unwrap();
2325
2326        // Verify the manifest was replaced (different built_at timestamp)
2327        let new_manifest = storage.load_manifest().unwrap();
2328        assert_ne!(
2329            original_built_at, new_manifest.built_at,
2330            "Manifest should have been replaced with new timestamp"
2331        );
2332        assert_eq!(
2333            new_manifest.build_provenance.build_command, "test:rebuild",
2334            "Manifest should reflect the rebuild provenance"
2335        );
2336    }
2337
2338    /// Regression test: failed rebuild leaves index in non-ready state.
2339    ///
2340    /// Exercises the real pipeline by making the analysis directory
2341    /// non-writable after an initial build, then attempting a rebuild.
2342    /// The pipeline should:
2343    ///   1. Remove the old manifest (Step 2) — making `exists()` false.
2344    ///   2. Write the new snapshot (Step 3).
2345    ///   3. Fail at analysis persistence (Step 9) because the directory
2346    ///      is not writable.
2347    ///   4. Return an error — manifest is NEVER written.
2348    ///
2349    /// After the failed rebuild, `storage.exists()` must be false (old
2350    /// manifest removed), even though the snapshot file was updated.
2351    #[test]
2352    fn test_failed_rebuild_leaves_index_not_ready() {
2353        use crate::graph::unified::persistence::GraphStorage;
2354
2355        let temp_dir = tempfile::TempDir::new().unwrap();
2356        let src = temp_dir.path().join("lib.rs");
2357        std::fs::write(&src, "fn main() {}").unwrap();
2358
2359        // Build an initial index (success)
2360        let mut plugins = PluginManager::new();
2361        plugins.register_builtin(Box::new(TestPlugin::new(
2362            "rust-simple",
2363            RUST_TEST_EXTENSIONS,
2364            Some(Box::new(SimpleGraphBuilder)),
2365        )));
2366        let config = BuildConfig::default();
2367        build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:initial").unwrap();
2368
2369        let storage = GraphStorage::new(temp_dir.path());
2370        assert!(
2371            storage.exists(),
2372            "Manifest should exist after initial build"
2373        );
2374
2375        // Replace the analysis directory with a regular file to force a
2376        // failure at Step 9 (analysis persistence). `create_dir_all` will
2377        // fail because a regular file exists where a directory is expected.
2378        // This simulates the real failure window between snapshot write
2379        // (Step 3) and manifest write (Step 10).
2380        let analysis_dir = storage.analysis_dir().to_path_buf();
2381        std::fs::remove_dir_all(&analysis_dir).unwrap();
2382        std::fs::write(&analysis_dir, b"blocker").unwrap();
2383
2384        // Attempt rebuild — should fail at analysis persistence
2385        let result =
2386            build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:failed_rebuild");
2387
2388        // Restore analysis dir so TempDir cleanup succeeds
2389        std::fs::remove_file(&analysis_dir).unwrap();
2390        std::fs::create_dir_all(&analysis_dir).unwrap();
2391
2392        // The build should have failed
2393        assert!(
2394            result.is_err(),
2395            "Rebuild should fail when analysis dir is read-only"
2396        );
2397
2398        // The old manifest should have been removed (Step 2 ran before failure)
2399        assert!(
2400            !storage.exists(),
2401            "After failed rebuild, manifest should have been removed — index is NOT ready"
2402        );
2403
2404        // The snapshot was updated (Step 3 succeeded before failure)
2405        assert!(
2406            storage.snapshot_exists(),
2407            "Snapshot should still exist on disk (written before failure)"
2408        );
2409    }
2410
2411    // ===== CSR Compaction Persistence Regression Tests =====
2412
2413    /// Graph builder that creates duplicate edges to exercise `raw_edge_count` > `edge_count`.
2414    struct DuplicateCallsGraphBuilder;
2415
2416    impl GraphBuilder for DuplicateCallsGraphBuilder {
2417        fn build_graph(
2418            &self,
2419            _tree: &Tree,
2420            _content: &[u8],
2421            file: &Path,
2422            staging: &mut StagingGraph,
2423        ) -> GraphResult<()> {
2424            use crate::graph::unified::build::helper::GraphBuildHelper;
2425
2426            let mut helper = GraphBuildHelper::new(staging, file, Language::Rust);
2427            let fn1 = helper.add_function("main", None, false, false);
2428            let fn2 = helper.add_function("helper", None, false, false);
2429
2430            // Add the same Calls edge twice to create a duplicate
2431            helper.add_call_edge(fn1, fn2);
2432            helper.add_call_edge(fn1, fn2);
2433
2434            Ok(())
2435        }
2436
2437        fn language(&self) -> Language {
2438            Language::Rust
2439        }
2440    }
2441
2442    /// Persisted snapshot has CSR on both stores and empty deltas.
2443    #[test]
2444    fn test_persisted_snapshot_compacts_both_edge_stores_before_save() {
2445        use crate::graph::unified::persistence::{GraphStorage, load_from_path};
2446
2447        let temp_dir = TempDir::new().expect("temp dir");
2448        let file_path = temp_dir.path().join("test.rs");
2449        fs::write(&file_path, "fn main() {} fn helper() {}").expect("write test file");
2450
2451        let mut plugins = PluginManager::new();
2452        plugins.register_builtin(Box::new(TestPlugin::new(
2453            "rust-simple",
2454            RUST_TEST_EXTENSIONS,
2455            Some(Box::new(SimpleGraphBuilder)),
2456        )));
2457        let config = BuildConfig::default();
2458
2459        let _result =
2460            build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:csr_compact")
2461                .expect("build should succeed");
2462
2463        // Load the persisted snapshot and verify CSR state
2464        let storage = GraphStorage::new(temp_dir.path());
2465        let loaded = load_from_path(storage.snapshot_path(), None).expect("load should succeed");
2466
2467        assert!(
2468            loaded.edges().forward().csr().is_some(),
2469            "Forward store must have CSR after persistence"
2470        );
2471        assert!(
2472            loaded.edges().reverse().csr().is_some(),
2473            "Reverse store must have CSR after persistence"
2474        );
2475
2476        let stats = loaded.edges().stats();
2477        assert_eq!(
2478            stats.forward.delta_edge_count, 0,
2479            "Forward delta must be empty after persistence"
2480        );
2481        assert_eq!(
2482            stats.reverse.delta_edge_count, 0,
2483            "Reverse delta must be empty after persistence"
2484        );
2485    }
2486
2487    /// Loaded snapshot supports reverse traversal (direct-callers / `edges_to`).
2488    #[test]
2489    fn test_loaded_snapshot_edges_to_works_after_round_trip() {
2490        use crate::graph::unified::edge::EdgeKind;
2491        use crate::graph::unified::persistence::{GraphStorage, load_from_path};
2492        use crate::graph::unified::{
2493            FileScope, ResolutionMode, SymbolCandidateOutcome, SymbolQuery,
2494        };
2495
2496        let temp_dir = TempDir::new().expect("temp dir");
2497        let file_path = temp_dir.path().join("test.rs");
2498        fs::write(&file_path, "fn main() {} fn helper() {}").expect("write test file");
2499
2500        let mut plugins = PluginManager::new();
2501        plugins.register_builtin(Box::new(TestPlugin::new(
2502            "rust-simple",
2503            RUST_TEST_EXTENSIONS,
2504            Some(Box::new(SimpleGraphBuilder)),
2505        )));
2506        let config = BuildConfig::default();
2507
2508        build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:round_trip")
2509            .expect("build should succeed");
2510
2511        let storage = GraphStorage::new(temp_dir.path());
2512        let loaded = load_from_path(storage.snapshot_path(), None).expect("load should succeed");
2513
2514        // Find main and helper node IDs through symbol resolution
2515        let snapshot = loaded.snapshot();
2516
2517        let main_id = match snapshot.find_symbol_candidates(&SymbolQuery {
2518            symbol: "main",
2519            file_scope: FileScope::Any,
2520            mode: ResolutionMode::AllowSuffixCandidates,
2521        }) {
2522            SymbolCandidateOutcome::Candidates(ids) => ids[0],
2523            _ => panic!("main node must exist"),
2524        };
2525
2526        let helper_id = match snapshot.find_symbol_candidates(&SymbolQuery {
2527            symbol: "helper",
2528            file_scope: FileScope::Any,
2529            mode: ResolutionMode::AllowSuffixCandidates,
2530        }) {
2531            SymbolCandidateOutcome::Candidates(ids) => ids[0],
2532            _ => panic!("helper node must exist"),
2533        };
2534
2535        // Forward: main -> helper
2536        let forward_edges = loaded.edges().edges_from(main_id);
2537        let has_call = forward_edges
2538            .iter()
2539            .any(|e| e.target == helper_id && matches!(e.kind, EdgeKind::Calls { .. }));
2540        assert!(has_call, "Forward traversal: main should call helper");
2541
2542        // Reverse: helper <- main (the critical regression check)
2543        let reverse_edges = loaded.edges().edges_to(helper_id);
2544        let has_caller = reverse_edges
2545            .iter()
2546            .any(|e| e.source == main_id && matches!(e.kind, EdgeKind::Calls { .. }));
2547        assert!(
2548            has_caller,
2549            "Reverse traversal: helper should have main as caller"
2550        );
2551    }
2552
2553    /// `raw_edge_count` >= `edge_count` still holds after pre-save compaction.
2554    #[test]
2555    fn test_raw_edge_count_preserved_across_pre_save_compaction() {
2556        use crate::graph::unified::persistence::GraphStorage;
2557
2558        let temp_dir = TempDir::new().expect("temp dir");
2559        let file_path = temp_dir.path().join("test.rs");
2560        fs::write(&file_path, "fn main() {} fn helper() {}").expect("write test file");
2561
2562        let mut plugins = PluginManager::new();
2563        plugins.register_builtin(Box::new(TestPlugin::new(
2564            "rust-dup",
2565            RUST_TEST_EXTENSIONS,
2566            Some(Box::new(DuplicateCallsGraphBuilder)),
2567        )));
2568        let config = BuildConfig::default();
2569
2570        let (_graph, build_result) =
2571            build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:raw_edge_count")
2572                .expect("build should succeed");
2573
2574        assert!(
2575            build_result.raw_edge_count > build_result.edge_count,
2576            "raw_edge_count ({}) must be > edge_count ({}) for duplicate builder",
2577            build_result.raw_edge_count,
2578            build_result.edge_count
2579        );
2580
2581        // Verify manifest matches
2582        let storage = GraphStorage::new(temp_dir.path());
2583        let manifest = storage.load_manifest().expect("manifest should load");
2584
2585        assert_eq!(
2586            manifest.raw_edge_count,
2587            Some(build_result.raw_edge_count),
2588            "Manifest raw_edge_count must match build result"
2589        );
2590        assert_eq!(
2591            manifest.edge_count, build_result.edge_count,
2592            "Manifest edge_count must match build result"
2593        );
2594    }
2595
2596    /// Full round-trip: build -> save -> load -> query produces correct results.
2597    #[test]
2598    fn test_build_save_load_query_round_trip_preserves_edge_queries() {
2599        use crate::graph::unified::edge::EdgeKind;
2600        use crate::graph::unified::persistence::{GraphStorage, load_from_path};
2601        use crate::graph::unified::{
2602            FileScope, ResolutionMode, SymbolCandidateOutcome, SymbolQuery,
2603        };
2604
2605        let temp_dir = TempDir::new().expect("temp dir");
2606        let file_path = temp_dir.path().join("test.rs");
2607        fs::write(&file_path, "fn main() {} fn helper() {}").expect("write test file");
2608
2609        let mut plugins = PluginManager::new();
2610        plugins.register_builtin(Box::new(TestPlugin::new(
2611            "rust-simple",
2612            RUST_TEST_EXTENSIONS,
2613            Some(Box::new(SimpleGraphBuilder)),
2614        )));
2615        let config = BuildConfig::default();
2616
2617        let (_original_graph, build_result) =
2618            build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:full_round_trip")
2619                .expect("build should succeed");
2620
2621        // Load from disk
2622        let storage = GraphStorage::new(temp_dir.path());
2623        let loaded = load_from_path(storage.snapshot_path(), None).expect("load should succeed");
2624
2625        // Edge count on loaded graph should match dedup count
2626        assert_eq!(
2627            loaded.edge_count(),
2628            build_result.edge_count,
2629            "Loaded graph edge count must match build result dedup count"
2630        );
2631
2632        // Node count should match
2633        assert_eq!(
2634            loaded.node_count(),
2635            build_result.node_count,
2636            "Loaded graph node count must match build result"
2637        );
2638
2639        // Verify edge queries work on loaded graph
2640        let snapshot = loaded.snapshot();
2641
2642        let main_id = match snapshot.find_symbol_candidates(&SymbolQuery {
2643            symbol: "main",
2644            file_scope: FileScope::Any,
2645            mode: ResolutionMode::AllowSuffixCandidates,
2646        }) {
2647            SymbolCandidateOutcome::Candidates(ids) => {
2648                assert!(!ids.is_empty(), "main must exist");
2649                ids[0]
2650            }
2651            _ => panic!("main node must exist"),
2652        };
2653
2654        let helper_id = match snapshot.find_symbol_candidates(&SymbolQuery {
2655            symbol: "helper",
2656            file_scope: FileScope::Any,
2657            mode: ResolutionMode::AllowSuffixCandidates,
2658        }) {
2659            SymbolCandidateOutcome::Candidates(ids) => {
2660                assert!(!ids.is_empty(), "helper must exist");
2661                ids[0]
2662            }
2663            _ => panic!("helper node must exist"),
2664        };
2665
2666        // Forward query: main calls helper
2667        let fwd = loaded.edges().edges_from(main_id);
2668        let has_fwd_call = fwd
2669            .iter()
2670            .any(|e| e.target == helper_id && matches!(e.kind, EdgeKind::Calls { .. }));
2671        assert!(has_fwd_call, "edges_from(main) must include call to helper");
2672
2673        // Reverse query: helper called by main
2674        let rev = loaded.edges().edges_to(helper_id);
2675        let has_rev_call = rev
2676            .iter()
2677            .any(|e| e.source == main_id && matches!(e.kind, EdgeKind::Calls { .. }));
2678        assert!(has_rev_call, "edges_to(helper) must include caller main");
2679    }
2680
2681    // -----------------------------------------------------------------
2682    // Phase 7c cancellation wire-through tests (task 7 phase 7c)
2683    // -----------------------------------------------------------------
2684    //
2685    // The four cancellation-boundary tests below exercise the pipeline
2686    // at distinct points in `build_unified_graph_inner`:
2687    //
2688    //   1. preflight — token cancelled before the first boundary; no
2689    //      FS walk, no parse, no Phase 4 work.
2690    //   2. mid-chunk — token flipped after the first chunk commits via
2691    //      the AfterChunkHookGuard; second chunk never parses.
2692    //   3. pre-Phase-4 — token flipped after the chunk loop exits via
2693    //      the BeforePhase4HookGuard; Phase 4a+ never runs.
2694    //   4. pre-Pass-5 — token flipped before cross-language linking
2695    //      via the BeforePass5HookGuard; Pass 5 never runs.
2696    //
2697    // A fifth test confirms the backwards-compatible default path
2698    // (no cancellation arg) still returns a fully-built graph.
2699
2700    fn build_rust_test_fixture(dir: &Path, file_count: usize) {
2701        for i in 0..file_count {
2702            let path = dir.join(format!("fixture_{i}.rs"));
2703            fs::write(&path, format!("pub fn fn_{i}() {{ let _ = {i}; }}")).expect("write fixture");
2704        }
2705    }
2706
2707    fn make_rust_test_plugins() -> PluginManager {
2708        let mut plugins = PluginManager::new();
2709        plugins.register_builtin(Box::new(TestPlugin::new(
2710            "rust-noop-for-cancellation-tests",
2711            RUST_TEST_EXTENSIONS,
2712            Some(Box::new(NoopGraphBuilder)),
2713        )));
2714        plugins
2715    }
2716
2717    #[test]
2718    fn build_unified_graph_cancellable_preflight_cancellation_returns_cancelled() {
2719        let tmp = TempDir::new().expect("tmp");
2720        build_rust_test_fixture(tmp.path(), 4);
2721        let plugins = make_rust_test_plugins();
2722        let config = BuildConfig::default();
2723
2724        let cancel = CancellationToken::new();
2725        cancel.cancel();
2726
2727        let result = build_unified_graph_cancellable(tmp.path(), &plugins, &config, &cancel);
2728        let err = result.expect_err("pre-cancelled token must short-circuit");
2729        assert!(
2730            matches!(err, GraphBuilderError::Cancelled),
2731            "expected Cancelled, got: {err:?}"
2732        );
2733    }
2734
2735    #[test]
2736    fn build_unified_graph_cancellable_mid_chunk_cancellation_returns_cancelled() {
2737        let tmp = TempDir::new().expect("tmp");
2738        // Force multiple chunks by setting a tiny staging_memory_limit.
2739        build_rust_test_fixture(tmp.path(), 8);
2740        let plugins = make_rust_test_plugins();
2741        // A very small memory limit forces ~1 file per chunk.
2742        let config = BuildConfig {
2743            staging_memory_limit: 1,
2744            ..BuildConfig::default()
2745        };
2746
2747        let cancel = CancellationToken::new();
2748
2749        // Install a hook that cancels after the FIRST chunk. The hook
2750        // fires at the TOP of every chunk iteration (including chunk 0
2751        // before cancelling). We cancel on the first call; the next
2752        // iteration's top-of-loop `cancellation.check()` short-circuits.
2753        let cancel_for_hook = cancel.clone();
2754        let mut call_count = 0u32;
2755        let _guard = testing::AfterChunkHookGuard::install(move |tok| {
2756            call_count += 1;
2757            if call_count >= 2 {
2758                cancel_for_hook.cancel();
2759                // `tok` is the same shared Arc under the hood.
2760                assert!(tok.is_cancelled());
2761            }
2762        });
2763
2764        let result = build_unified_graph_cancellable(tmp.path(), &plugins, &config, &cancel);
2765        let err = result.expect_err("mid-chunk cancellation must short-circuit");
2766        assert!(
2767            matches!(err, GraphBuilderError::Cancelled),
2768            "expected Cancelled, got: {err:?}"
2769        );
2770    }
2771
2772    #[test]
2773    fn build_unified_graph_cancellable_pre_phase4_cancellation_short_circuits() {
2774        let tmp = TempDir::new().expect("tmp");
2775        build_rust_test_fixture(tmp.path(), 4);
2776        let plugins = make_rust_test_plugins();
2777        let config = BuildConfig::default();
2778
2779        let cancel = CancellationToken::new();
2780        let cancel_for_hook = cancel.clone();
2781        let _guard = testing::BeforePhase4HookGuard::install(move |_tok| {
2782            cancel_for_hook.cancel();
2783        });
2784
2785        let result = build_unified_graph_cancellable(tmp.path(), &plugins, &config, &cancel);
2786        let err = result.expect_err("pre-Phase-4 cancellation must short-circuit");
2787        assert!(
2788            matches!(err, GraphBuilderError::Cancelled),
2789            "expected Cancelled, got: {err:?}"
2790        );
2791    }
2792
2793    #[test]
2794    fn build_unified_graph_cancellable_pre_pass5_cancellation_short_circuits() {
2795        let tmp = TempDir::new().expect("tmp");
2796        build_rust_test_fixture(tmp.path(), 4);
2797        let plugins = make_rust_test_plugins();
2798        let config = BuildConfig::default();
2799
2800        let cancel = CancellationToken::new();
2801        let cancel_for_hook = cancel.clone();
2802        let _guard = testing::BeforePass5HookGuard::install(move |_tok| {
2803            cancel_for_hook.cancel();
2804        });
2805
2806        let result = build_unified_graph_cancellable(tmp.path(), &plugins, &config, &cancel);
2807        let err = result.expect_err("pre-Pass-5 cancellation must short-circuit");
2808        assert!(
2809            matches!(err, GraphBuilderError::Cancelled),
2810            "expected Cancelled, got: {err:?}"
2811        );
2812    }
2813
2814    #[test]
2815    fn build_unified_graph_default_path_is_backwards_compatible() {
2816        let tmp = TempDir::new().expect("tmp");
2817        build_rust_test_fixture(tmp.path(), 3);
2818        let plugins = make_rust_test_plugins();
2819        let config = BuildConfig::default();
2820
2821        // Legacy API: no cancellation parameter. Must return a
2822        // built graph without triggering cancellation short-circuits.
2823        // (The test plugin is a NoopGraphBuilder that produces zero
2824        // nodes; we only assert the success path returns Ok.)
2825        let _graph = build_unified_graph(tmp.path(), &plugins, &config)
2826            .expect("legacy path must still build successfully");
2827    }
2828}