1use std::fs;
7use std::path::{Path, PathBuf};
8use std::time::{Duration, Instant};
9
10use anyhow::{Context, Result};
11use ignore::WalkBuilder;
12use rayon::prelude::*;
13
14use crate::graph::GraphBuilderError;
15use crate::graph::error::GraphResult;
16use crate::graph::unified::analysis::LabelBudgetConfig;
17use crate::graph::unified::analysis::ReachabilityStrategy;
18use crate::graph::unified::build::StagingGraph;
19use crate::graph::unified::build::cancellation::CancellationToken;
20use crate::graph::unified::build::parallel_commit::{
21 GlobalOffsets, phase2_assign_ranges, phase3_parallel_commit, phase4_apply_global_remap,
22 phase4c_prime_unify_cross_file_nodes, phase4d_bulk_insert_edges,
23};
24use crate::graph::unified::build::pass3_intra::PendingEdge;
25use crate::graph::unified::build::progress::GraphBuildProgressTracker;
26use crate::graph::unified::concurrent::CodeGraph;
27use crate::io::FileReader;
28use crate::plugin::PluginManager;
29use crate::plugin::error::ParseError;
30use crate::plugin::{SafeParser, SafeParserConfig};
31use crate::progress::{SharedReporter, no_op_reporter};
32use crate::project::path_utils::normalize_path_components;
33
34#[derive(Debug, Clone)]
40pub struct BuildResult {
41 pub node_count: usize,
43 pub edge_count: usize,
46 pub raw_edge_count: usize,
49 pub file_count: std::collections::HashMap<String, usize>,
55 pub total_files: usize,
57 pub built_at: String,
59 pub root_path: String,
61 pub thread_count: usize,
66
67 pub active_plugin_ids: Vec<String>,
69
70 pub analysis_strategies: Vec<AnalysisStrategySummary>,
72}
73
74#[derive(Debug, Clone, PartialEq, Eq)]
76pub struct AnalysisStrategySummary {
77 pub edge_kind: &'static str,
79 pub strategy: ReachabilityStrategy,
81}
82
83const DEFAULT_STAGING_MEMORY_LIMIT: usize = 512 * 1024 * 1024;
89
90const DEFAULT_EXCLUDED_SOURCE_DIRS: &[&str] = &[
100 ".git",
101 ".hg",
102 ".svn",
103 ".cache",
104 ".next",
105 ".nuxt",
106 ".sqry",
107 ".turbo",
108 ".venv",
109 "__pycache__",
110 "_actions",
111 "_update",
112 "_work",
113 "build",
114 "dist",
115 "node_modules",
116 "target",
117 "vendor",
118 "venv",
119];
120
121const DEFAULT_EXCLUDED_SOURCE_DIR_PREFIXES: &[&str] = &["externals."];
122
123#[derive(Debug, Clone)]
125pub struct BuildConfig {
126 pub max_depth: Option<usize>,
128
129 pub follow_links: bool,
131
132 pub include_hidden: bool,
134
135 pub num_threads: Option<usize>,
137
138 pub staging_memory_limit: usize,
147
148 pub label_budget: LabelBudgetConfig,
153}
154
155impl Default for BuildConfig {
156 fn default() -> Self {
157 let limit = std::env::var("SQRY_STAGING_MEMORY_LIMIT_MB")
158 .ok()
159 .and_then(|v| v.parse::<usize>().ok())
160 .map_or(DEFAULT_STAGING_MEMORY_LIMIT, |mb| mb * 1024 * 1024);
161
162 let label_budget = LabelBudgetConfig {
163 budget_per_kind: 15_000_000,
164 on_exceeded: crate::graph::unified::analysis::BudgetExceededPolicy::Degrade,
165 density_gate_threshold: 64,
166 skip_labels: false,
167 };
168
169 Self {
170 max_depth: None,
171 follow_links: false,
172 include_hidden: false,
173 num_threads: None,
174 staging_memory_limit: limit,
175 label_budget,
176 }
177 }
178}
179
180fn create_thread_pool(config: &BuildConfig) -> Result<rayon::ThreadPool> {
182 let mut builder = rayon::ThreadPoolBuilder::new();
183 if let Some(n) = config.num_threads {
184 builder = builder.num_threads(n);
185 }
186 builder
187 .build()
188 .context("Failed to create rayon thread pool for parallel indexing")
189}
190
191fn compute_parse_chunks(
200 files: &[PathBuf],
201 _pool: &rayon::ThreadPool,
202 _plugins: &PluginManager,
203 memory_limit: usize,
204) -> Vec<std::ops::Range<usize>> {
205 const EXPANSION_FACTOR: usize = 4;
209
210 let mut chunks = Vec::new();
211 let mut chunk_start = 0;
212 let mut chunk_estimate = 0usize;
213
214 for (i, path) in files.iter().enumerate() {
215 #[allow(clippy::cast_possible_truncation)] let file_size = std::fs::metadata(path)
217 .map(|m| m.len() as usize)
218 .unwrap_or(0);
219 let estimated_staging = file_size * EXPANSION_FACTOR;
220
221 if chunk_estimate + estimated_staging > memory_limit && i > chunk_start {
224 chunks.push(chunk_start..i);
225 chunk_start = i;
226 chunk_estimate = 0;
227 }
228 chunk_estimate += estimated_staging;
229 }
230
231 if chunk_start < files.len() {
233 chunks.push(chunk_start..files.len());
234 }
235
236 if chunks.len() > 1 {
237 log::info!(
238 "Memory-bounded chunking: {} batches for {} files (limit: {} MB)",
239 chunks.len(),
240 files.len(),
241 memory_limit / (1024 * 1024),
242 );
243 }
244
245 chunks
246}
247
248pub const GRAPH_FILE_PROCESSING_PHASE: &str = "File processing";
250
251pub fn build_unified_graph(
289 root: &Path,
290 plugins: &PluginManager,
291 config: &BuildConfig,
292) -> Result<CodeGraph> {
293 build_unified_graph_cancellable(root, plugins, config, &CancellationToken::default())
294 .map_err(anyhow::Error::from)
295}
296
297pub fn build_unified_graph_with_progress(
318 root: &Path,
319 plugins: &PluginManager,
320 config: &BuildConfig,
321 progress: SharedReporter,
322) -> Result<(CodeGraph, usize)> {
323 build_unified_graph_with_progress_cancellable(
324 root,
325 plugins,
326 config,
327 progress,
328 &CancellationToken::default(),
329 )
330 .map_err(anyhow::Error::from)
331}
332
333pub fn build_unified_graph_cancellable(
350 root: &Path,
351 plugins: &PluginManager,
352 config: &BuildConfig,
353 cancellation: &CancellationToken,
354) -> GraphResult<CodeGraph> {
355 let (graph, _effective_threads) =
356 build_unified_graph_inner(root, plugins, config, no_op_reporter(), cancellation)?;
357 Ok(graph)
358}
359
360pub fn build_unified_graph_with_progress_cancellable(
370 root: &Path,
371 plugins: &PluginManager,
372 config: &BuildConfig,
373 progress: SharedReporter,
374 cancellation: &CancellationToken,
375) -> GraphResult<(CodeGraph, usize)> {
376 build_unified_graph_inner(root, plugins, config, progress, cancellation)
377}
378
379#[allow(clippy::too_many_lines)] fn build_unified_graph_inner(
390 root: &Path,
391 plugins: &PluginManager,
392 config: &BuildConfig,
393 progress: SharedReporter,
394 cancellation: &CancellationToken,
395) -> GraphResult<(CodeGraph, usize)> {
396 if !root.exists() {
397 return Err(GraphBuilderError::Internal {
398 reason: format!("Path {} does not exist", root.display()),
399 });
400 }
401
402 log::info!(
403 "Building unified graph from source files in {}",
404 root.display()
405 );
406
407 cancellation.check()?;
409
410 let has_graph_builders = plugins
411 .plugins()
412 .iter()
413 .any(|plugin| plugin.graph_builder().is_some());
414 if !has_graph_builders {
415 return Err(GraphBuilderError::Internal {
416 reason: "No graph builders registered – cannot build code graph".to_string(),
417 });
418 }
419
420 let tracker = GraphBuildProgressTracker::new(progress);
422
423 let mut files = find_source_files(root, config);
425 sort_files_for_build(root, &mut files);
426
427 cancellation.check()?;
430
431 let mut graph = CodeGraph::new();
433
434 let pool = create_thread_pool(config).map_err(|e| GraphBuilderError::Internal {
436 reason: format!("thread pool: {e}"),
437 })?;
438 let effective_threads = pool.current_num_threads();
439 log::info!("Parallel indexing: using {effective_threads} threads");
440
441 let total_files = files.len();
455 tracker.start_phase(
456 1,
457 "Chunked structural indexing (parse -> range-plan -> semantic commit)",
458 total_files,
459 );
460
461 let (mut succeeded, mut parse_errors, mut skipped, mut timed_out) =
462 (0usize, 0usize, 0usize, 0usize);
463 let mut total_staging_bytes = 0usize;
464 let mut peak_chunk_staging_bytes = 0usize;
465 let mut max_file_staging_bytes = 0usize;
466
467 let initial_string_offset = graph.strings_mut().alloc_range(0).unwrap_or(1);
470 let mut offsets = GlobalOffsets {
471 node_offset: u32::try_from(graph.nodes().slot_count()).unwrap_or(0),
472 string_offset: initial_string_offset,
473 };
474 let mut all_edges: Vec<Vec<PendingEdge>> = Vec::new();
476
477 let chunks = compute_parse_chunks(&files, &pool, plugins, config.staging_memory_limit);
478 for chunk_range in chunks {
479 cancellation.check()?;
481
482 let chunk_files = &files[chunk_range];
483
484 #[cfg(any(test, feature = "rebuild-internals"))]
489 testing::fire_after_chunk_hook(cancellation);
490
491 let staged_results: Vec<(PathBuf, Result<ParsedFileOutcome>)> = pool.install(|| {
493 chunk_files
494 .par_iter()
495 .map(|path| {
496 let result = parse_file(path.as_path(), plugins);
497 tracker.increment_progress();
498 (path.clone(), result)
499 })
500 .collect()
501 });
502
503 let mut chunk_parsed: Vec<(PathBuf, ParsedFile)> = Vec::new();
505 let mut chunk_staging_bytes = 0usize;
506 for (path, result) in staged_results {
507 match result {
508 Ok(ParsedFileOutcome::Parsed(parsed)) => {
509 let file_bytes = parsed.staging.estimated_byte_size();
510 total_staging_bytes += file_bytes;
511 chunk_staging_bytes += file_bytes;
512 if file_bytes > max_file_staging_bytes {
513 max_file_staging_bytes = file_bytes;
514 }
515 chunk_parsed.push((path, parsed));
516 }
517 Ok(ParsedFileOutcome::Skipped) => skipped += 1,
518 Ok(ParsedFileOutcome::TimedOut {
519 file,
520 phase,
521 timeout_ms,
522 }) => {
523 timed_out += 1;
524 log::warn!(
525 "Timed out building graph for {} during {} after {} ms",
526 file.display(),
527 phase,
528 timeout_ms,
529 );
530 }
531 Err(e) => {
532 parse_errors += 1;
533 log::warn!("Failed to parse {}: {e}", path.display());
534 }
535 }
536 }
537 if chunk_staging_bytes > peak_chunk_staging_bytes {
538 peak_chunk_staging_bytes = chunk_staging_bytes;
539 }
540
541 if chunk_parsed.is_empty() {
542 continue;
543 }
544
545 let file_info: Vec<_> = chunk_parsed
547 .iter()
548 .map(|(path, parsed)| (path.clone(), Some(parsed.language)))
549 .collect();
550 let file_ids = graph.files_mut().register_batch(&file_info).map_err(|e| {
551 GraphBuilderError::Internal {
552 reason: format!("Failed to register files: {e}"),
553 }
554 })?;
555
556 let staging_refs: Vec<_> = chunk_parsed.iter().map(|(_, p)| &p.staging).collect();
558 let plan = phase2_assign_ranges(&staging_refs, &file_ids, &offsets);
559
560 let placeholder = crate::graph::unified::storage::NodeEntry::new(
562 crate::graph::unified::node::NodeKind::Other,
563 crate::graph::unified::string::StringId::new(0),
564 crate::graph::unified::file::FileId::new(0),
565 );
566 graph
567 .nodes_mut()
568 .alloc_range(plan.total_nodes, &placeholder)
569 .map_err(|e| GraphBuilderError::Internal {
570 reason: format!("Failed to alloc node range: {e:?}"),
571 })?;
572 graph
573 .strings_mut()
574 .alloc_range(plan.total_strings)
575 .map_err(|e| GraphBuilderError::Internal {
576 reason: format!("Failed to alloc string range: {e}"),
577 })?;
578
579 let phase3 = pool.install(|| phase3_parallel_commit(&plan, &staging_refs, &mut graph));
588
589 let expected_nodes = plan.total_nodes as usize;
593 let expected_strings = plan.total_strings as usize;
594 let expected_edges = usize::try_from(plan.total_edges)
595 .unwrap_or_else(|_| unreachable!("edge count does not fit usize"));
596 if phase3.total_nodes_written != expected_nodes
597 || phase3.total_strings_written != expected_strings
598 || phase3.total_edges_collected != expected_edges
599 {
600 return Err(GraphBuilderError::Internal {
601 reason: format!(
602 "Phase 3 count mismatch: nodes {}/{expected_nodes}, strings {}/{expected_strings}, edges {}/{expected_edges}. This indicates a bug in StagingGraph counting.",
603 phase3.total_nodes_written,
604 phase3.total_strings_written,
605 phase3.total_edges_collected,
606 ),
607 });
608 }
609
610 for fp in &plan.file_plans {
612 let start = fp.node_range.start;
613 let count = fp.node_range.end.saturating_sub(start);
614 graph
615 .file_segments_mut()
616 .record_range(fp.file_id, start, count);
617 }
618
619 debug_assert_eq!(
632 phase3.per_file_node_ids.len(),
633 plan.file_plans.len(),
634 "phase3 per-file node ID vector length must match plan length"
635 );
636 for (fp, node_ids) in plan.file_plans.iter().zip(phase3.per_file_node_ids.iter()) {
637 for nid in node_ids {
638 graph.files_mut().record_node(fp.file_id, *nid);
639 }
640 }
641
642 succeeded += chunk_parsed.len();
643
644 for (_path, parsed) in &mut chunk_parsed {
646 if let Some(confidence) = parsed.staging.take_confidence() {
647 let language_name = parsed.language.to_string();
648 graph.merge_confidence(&language_name, confidence);
649 }
650 }
651
652 offsets.node_offset += plan.total_nodes;
654 offsets.string_offset += plan.total_strings;
655
656 cancellation.check()?;
659
660 all_edges.extend(phase3.per_file_edges);
662 }
663 tracker.complete_phase();
664
665 #[cfg(any(test, feature = "rebuild-internals"))]
669 testing::fire_before_phase4_hook(cancellation);
670
671 tracker.start_phase(4, "Finalizing graph", 5);
673
674 cancellation.check()?;
676
677 let string_remap = graph.strings_mut().build_dedup_table();
679 if !string_remap.is_empty() {
680 log::debug!(
681 "Phase 4a: dedup removed {} duplicate string(s)",
682 string_remap.len()
683 );
684
685 phase4_apply_global_remap(graph.nodes_mut(), &mut all_edges, &string_remap);
687 }
688 tracker.increment_progress(); cancellation.check()?;
692
693 graph.rebuild_indices();
696 tracker.increment_progress(); cancellation.check()?;
701
702 let unification_stats = phase4c_prime_unify_cross_file_nodes(&mut graph, &mut all_edges);
708 if unification_stats.nodes_merged > 0 {
709 log::info!(
710 "Phase 4c-prime: unified {} duplicate nodes ({} candidate groups examined, \
711 {} edges rewritten, {} ms)",
712 unification_stats.nodes_merged,
713 unification_stats.candidate_pairs_examined,
714 unification_stats.edges_rewritten,
715 unification_stats.elapsed_ms,
716 );
717 cancellation.check()?;
722 graph.rebuild_indices();
724 }
725 tracker.increment_progress(); cancellation.check()?;
729
730 let _final_edge_seq = phase4d_bulk_insert_edges(&mut graph, &all_edges);
737 tracker.increment_progress(); tracker.complete_phase();
739
740 log::info!(
741 "Parallel indexing complete: {succeeded} committed, {skipped} skipped, \
742 {timed_out} timed out, {parse_errors} parse errors, \
743 ~{} MB total staged, ~{} MB peak chunk (max single file: ~{} KB)",
744 total_staging_bytes / (1024 * 1024),
745 peak_chunk_staging_bytes / (1024 * 1024),
746 max_file_staging_bytes / 1024,
747 );
748
749 let attempted = succeeded + parse_errors + timed_out;
750
751 if attempted == 0 {
752 log::warn!(
753 "No eligible source files found for graph build in {}",
754 root.display()
755 );
756 }
757
758 if attempted > 0 && succeeded == 0 {
759 return Err(GraphBuilderError::Internal {
760 reason: "All graph builds failed".to_string(),
761 });
762 }
763
764 cancellation.check()?;
766
767 tracker.start_phase(5, "Binding plane derivation", 1);
777 let binding_stats = super::phase4e_binding::derive_binding_plane(&mut graph);
778 log::info!(
779 target: "sqry_core::build",
780 "Phase 4e: {} scopes, {} aliases, {} shadows derived",
781 binding_stats.scopes,
782 binding_stats.aliases,
783 binding_stats.shadows,
784 );
785 tracker.increment_progress();
786 tracker.complete_phase();
787
788 #[cfg(any(test, feature = "rebuild-internals"))]
793 testing::fire_before_pass5_hook(cancellation);
794
795 cancellation.check()?;
797
798 tracker.start_phase(6, "Cross-language linking", 1);
800 let pass5_stats = super::pass5_cross_language::link_cross_language_edges(&mut graph);
801 if pass5_stats.total_edges_created > 0 {
802 log::info!(
803 "Pass 5: {} cross-language edges created ({} FFI, {} HTTP)",
804 pass5_stats.total_edges_created,
805 pass5_stats.ffi_edges_created,
806 pass5_stats.http_endpoints_matched,
807 );
808 }
809 tracker.increment_progress(); tracker.complete_phase();
811
812 log::info!("Built unified graph with {} nodes", graph.node_count());
813
814 super::super::publish::assert_publish_bijection(&graph);
829
830 Ok((graph, effective_threads))
831}
832
833pub fn build_and_persist_graph(
842 root: &Path,
843 plugins: &PluginManager,
844 config: &BuildConfig,
845 build_command: &str,
846) -> Result<(CodeGraph, BuildResult)> {
847 build_and_persist_graph_with_progress(
848 root,
849 plugins,
850 config,
851 build_command,
852 inferred_plugin_selection_manifest(plugins),
853 no_op_reporter(),
854 )
855}
856
857fn inferred_plugin_selection_manifest(
858 plugins: &PluginManager,
859) -> Option<crate::graph::unified::persistence::PluginSelectionManifest> {
860 let active_plugin_ids = plugins
861 .plugins()
862 .iter()
863 .map(|plugin| plugin.metadata().id.to_string())
864 .collect::<Vec<_>>();
865 if active_plugin_ids.is_empty() {
866 return None;
867 }
868
869 Some(
870 crate::graph::unified::persistence::PluginSelectionManifest {
871 active_plugin_ids,
872 high_cost_mode: None,
873 },
874 )
875}
876
877#[allow(clippy::too_many_lines, clippy::needless_pass_by_value)]
887pub fn persist_and_analyze_graph(
888 graph: CodeGraph,
889 root: &Path,
890 plugins: &PluginManager,
891 config: &BuildConfig,
892 build_command: &str,
893 plugin_selection: Option<crate::graph::unified::persistence::PluginSelectionManifest>,
894 progress: SharedReporter,
895 effective_threads: usize,
896) -> Result<(CodeGraph, BuildResult)> {
897 use crate::graph::unified::analysis::csr::CsrAdjacency;
898 use crate::graph::unified::analysis::{AnalysisIdentity, GraphAnalyses, compute_node_id_hash};
899 use crate::graph::unified::compaction::{Direction, build_compacted_csr, snapshot_edges};
900 use crate::graph::unified::persistence::manifest::write_manifest_bytes_atomic;
901 use crate::graph::unified::persistence::{
902 BuildProvenance, GraphStorage, MANIFEST_SCHEMA_VERSION, Manifest, SNAPSHOT_FORMAT_VERSION,
903 save_to_path,
904 };
905 use crate::progress::IndexProgress;
906 use chrono::Utc;
907 use sha2::{Digest, Sha256};
908
909 let storage = GraphStorage::new(root);
917 fs::create_dir_all(storage.graph_dir())
918 .with_context(|| format!("Failed to create {}", storage.graph_dir().display()))?;
919
920 if storage.exists() {
921 match fs::remove_file(storage.manifest_path()) {
927 Ok(()) => {}
928 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
929 Err(e) => {
930 return Err(e).with_context(|| {
931 format!(
932 "Failed to remove old manifest at {} — rebuild cannot proceed safely",
933 storage.manifest_path().display()
934 )
935 });
936 }
937 }
938 }
939
940 let raw_edge_count = graph.edge_count();
942 let node_count = graph.node_count();
943
944 progress.report(IndexProgress::StageStarted {
951 stage_name: "Compacting edge stores for persistence",
952 });
953 let compaction_start = std::time::Instant::now();
954
955 let forward_compaction_snapshot = {
957 let forward_store = graph.edges().forward();
958 snapshot_edges(&forward_store, node_count)
959 };
960 let reverse_compaction_snapshot = {
961 let reverse_store = graph.edges().reverse();
962 snapshot_edges(&reverse_store, node_count)
963 };
964
965 let (forward_result, reverse_result) = rayon::join(
967 || build_compacted_csr(&forward_compaction_snapshot, Direction::Forward),
968 || build_compacted_csr(&reverse_compaction_snapshot, Direction::Reverse),
969 );
970
971 let (forward_csr, _forward_build_stats) =
972 forward_result.context("Failed to build forward CSR for persistence compaction")?;
973 let (reverse_csr, _reverse_build_stats) =
974 reverse_result.context("Failed to build reverse CSR for persistence compaction")?;
975
976 drop(forward_compaction_snapshot);
978 drop(reverse_compaction_snapshot);
979
980 let adjacency = CsrAdjacency::from_csr_graph(&forward_csr);
983
984 graph
986 .edges()
987 .swap_csrs_and_clear_deltas(forward_csr, reverse_csr);
988
989 progress.report(IndexProgress::StageCompleted {
990 stage_name: "Compacting edge stores for persistence",
991 stage_duration: compaction_start.elapsed(),
992 });
993
994 progress.report(IndexProgress::SavingStarted {
996 component_name: "unified graph",
997 });
998 let save_start = std::time::Instant::now();
999
1000 save_to_path(&graph, storage.snapshot_path()).with_context(|| {
1001 format!(
1002 "Failed to save snapshot to {}",
1003 storage.snapshot_path().display()
1004 )
1005 })?;
1006
1007 progress.report(IndexProgress::SavingCompleted {
1008 component_name: "unified graph",
1009 save_duration: save_start.elapsed(),
1010 });
1011
1012 let snapshot_content =
1014 fs::read(storage.snapshot_path()).context("Failed to read snapshot for checksum")?;
1015 let snapshot_sha256 = hex::encode(Sha256::digest(&snapshot_content));
1016
1017 progress.report(IndexProgress::StageStarted {
1021 stage_name: "Computing graph analyses",
1022 });
1023 let analysis_start = std::time::Instant::now();
1024
1025 let analyses = if let Some(thread_count) = config.num_threads {
1026 rayon::ThreadPoolBuilder::new()
1027 .num_threads(thread_count)
1028 .build()
1029 .context("Failed to create rayon thread pool for graph analysis")?
1030 .install(|| {
1031 GraphAnalyses::build_all_from_adjacency_with_budget(adjacency, &config.label_budget)
1032 })
1033 } else {
1034 GraphAnalyses::build_all_from_adjacency_with_budget(adjacency, &config.label_budget)
1035 }
1036 .context("Failed to build graph analyses")?;
1037
1038 progress.report(IndexProgress::StageCompleted {
1039 stage_name: "Computing graph analyses",
1040 stage_duration: analysis_start.elapsed(),
1041 });
1042
1043 let dedup_edge_count = analyses.adjacency.edge_count as usize;
1044
1045 let analysis_strategies = vec![
1046 AnalysisStrategySummary {
1047 edge_kind: "calls",
1048 strategy: analyses.cond_calls.strategy,
1049 },
1050 AnalysisStrategySummary {
1051 edge_kind: "imports",
1052 strategy: analyses.cond_imports.strategy,
1053 },
1054 AnalysisStrategySummary {
1055 edge_kind: "references",
1056 strategy: analyses.cond_references.strategy,
1057 },
1058 AnalysisStrategySummary {
1059 edge_kind: "inherits",
1060 strategy: analyses.cond_inherits.strategy,
1061 },
1062 ];
1063
1064 let mut file_counts: std::collections::HashMap<String, usize> =
1066 std::collections::HashMap::new();
1067 for (file_id, file_path) in graph.indexed_files() {
1068 if graph.files().is_external(file_id) {
1069 continue;
1070 }
1071 let language = plugins
1072 .plugin_for_path(file_path)
1073 .map_or_else(|| "unknown".to_string(), |p| p.metadata().id.to_string());
1074 *file_counts.entry(language).or_insert(0) += 1;
1075 }
1076 let total_files: usize = file_counts.values().sum();
1077
1078 let built_at = Utc::now().to_rfc3339();
1080
1081 let manifest = Manifest {
1082 schema_version: MANIFEST_SCHEMA_VERSION,
1083 snapshot_format_version: SNAPSHOT_FORMAT_VERSION,
1084 built_at: built_at.clone(),
1085 root_path: root.to_string_lossy().to_string(),
1086 node_count,
1087 edge_count: dedup_edge_count,
1088 raw_edge_count: Some(raw_edge_count),
1089 snapshot_sha256,
1090 build_provenance: BuildProvenance {
1091 sqry_version: env!("CARGO_PKG_VERSION").to_string(),
1092 build_timestamp: built_at.clone(),
1093 build_command: build_command.to_string(),
1094 plugin_hashes: std::collections::HashMap::default(),
1095 },
1096 file_count: file_counts.clone(),
1097 languages: Vec::default(),
1098 config: std::collections::HashMap::default(),
1099 confidence: graph.confidence().clone(),
1100 last_indexed_commit: get_git_head_commit(root),
1101 plugin_selection: plugin_selection.clone(),
1102 };
1103
1104 let manifest_bytes =
1106 serde_json::to_vec_pretty(&manifest).context("Failed to serialize manifest")?;
1107
1108 let manifest_hash = {
1109 let mut hasher = Sha256::new();
1110 hasher.update(&manifest_bytes);
1111 hex::encode(hasher.finalize())
1112 };
1113
1114 let snapshot = graph.snapshot();
1116 let node_id_hash = compute_node_id_hash(&snapshot);
1117 let identity = AnalysisIdentity::new(manifest_hash, node_id_hash);
1118
1119 fs::create_dir_all(storage.analysis_dir()).with_context(|| {
1120 format!(
1121 "Failed to create analysis directory at {}",
1122 storage.analysis_dir().display()
1123 )
1124 })?;
1125
1126 progress.report(IndexProgress::SavingStarted {
1127 component_name: "graph analyses",
1128 });
1129
1130 analyses
1131 .persist_all(&storage, &identity)
1132 .context("Failed to persist graph analyses")?;
1133
1134 log::info!(
1135 "Graph analyses persisted to {}",
1136 storage.analysis_dir().display()
1137 );
1138
1139 progress.report(IndexProgress::SavingCompleted {
1140 component_name: "graph analyses",
1141 save_duration: analysis_start.elapsed(),
1142 });
1143
1144 write_manifest_bytes_atomic(storage.manifest_path(), &manifest_bytes).with_context(|| {
1146 format!(
1147 "Failed to save manifest to {}",
1148 storage.manifest_path().display()
1149 )
1150 })?;
1151
1152 log::info!(
1153 "Manifest saved to {} (dedup edges: {}, raw edges: {})",
1154 storage.manifest_path().display(),
1155 dedup_edge_count,
1156 raw_edge_count
1157 );
1158
1159 let build_result = BuildResult {
1160 node_count,
1161 edge_count: dedup_edge_count,
1162 raw_edge_count,
1163 file_count: file_counts,
1164 total_files,
1165 built_at,
1166 root_path: root.to_string_lossy().to_string(),
1167 thread_count: effective_threads,
1168 active_plugin_ids: plugin_selection
1169 .map_or_else(Vec::new, |selection| selection.active_plugin_ids),
1170 analysis_strategies,
1171 };
1172
1173 Ok((graph, build_result))
1174}
1175
1176#[allow(clippy::too_many_lines, clippy::needless_pass_by_value)]
1200pub fn build_and_persist_graph_with_progress(
1201 root: &Path,
1202 plugins: &PluginManager,
1203 config: &BuildConfig,
1204 build_command: &str,
1205 plugin_selection: Option<crate::graph::unified::persistence::PluginSelectionManifest>,
1206 progress: SharedReporter,
1207) -> Result<(CodeGraph, BuildResult)> {
1208 let (graph, effective_threads) = build_unified_graph_inner(
1209 root,
1210 plugins,
1211 config,
1212 progress.clone(),
1213 &CancellationToken::default(),
1214 )
1215 .map_err(anyhow::Error::from)?;
1216 persist_and_analyze_graph(
1217 graph,
1218 root,
1219 plugins,
1220 config,
1221 build_command,
1222 plugin_selection,
1223 progress,
1224 effective_threads,
1225 )
1226}
1227
1228#[must_use]
1230pub fn get_git_head_commit(path: &Path) -> Option<String> {
1231 let output = std::process::Command::new("git")
1232 .arg("-C")
1233 .arg(path)
1234 .args(["rev-parse", "HEAD"])
1235 .output()
1236 .ok()?;
1237
1238 if output.status.success() {
1239 let sha = String::from_utf8_lossy(&output.stdout).trim().to_string();
1240 if sha.len() == 40 && sha.chars().all(|c| c.is_ascii_hexdigit()) {
1241 return Some(sha);
1242 }
1243 }
1244 None
1245}
1246
1247fn find_source_files(root: &Path, config: &BuildConfig) -> Vec<std::path::PathBuf> {
1251 let mut builder = WalkBuilder::new(root);
1252
1253 builder
1254 .follow_links(config.follow_links)
1255 .hidden(!config.include_hidden)
1256 .git_ignore(true)
1257 .git_global(true)
1258 .git_exclude(true);
1259
1260 if let Some(depth) = config.max_depth {
1261 builder.max_depth(Some(depth));
1262 }
1263
1264 if let Some(threads) = config.num_threads {
1265 builder.threads(threads);
1266 }
1267
1268 let root_for_filter = root.to_path_buf();
1269 builder.filter_entry(move |entry| {
1270 entry
1271 .file_type()
1272 .is_none_or(|file_type| !file_type.is_dir())
1273 || should_visit_source_dir(&root_for_filter, entry.path())
1274 });
1275
1276 let mut files = Vec::new();
1277
1278 for entry in builder.build() {
1279 let entry = match entry {
1280 Ok(entry) => entry,
1281 Err(err) => {
1282 log::warn!("Failed to read directory entry: {err}");
1283 continue;
1284 }
1285 };
1286
1287 if entry.file_type().is_some_and(|ft| ft.is_file()) {
1288 files.push(entry.into_path());
1289 }
1290 }
1291
1292 files
1293}
1294
1295fn should_visit_source_dir(root: &Path, path: &Path) -> bool {
1296 if path == root {
1297 return true;
1298 }
1299
1300 let Some(name) = path.file_name().and_then(|value| value.to_str()) else {
1301 return true;
1302 };
1303
1304 !is_default_excluded_source_dir(name)
1305}
1306
1307fn is_default_excluded_source_dir(name: &str) -> bool {
1308 if std::env::var("SQRY_INCLUDE_DEFAULT_EXCLUDED_DIRS")
1309 .is_ok_and(|value| value == "1" || value.eq_ignore_ascii_case("true"))
1310 {
1311 return false;
1312 }
1313
1314 DEFAULT_EXCLUDED_SOURCE_DIRS.contains(&name)
1315 || DEFAULT_EXCLUDED_SOURCE_DIR_PREFIXES
1316 .iter()
1317 .any(|prefix| name.starts_with(prefix))
1318}
1319
1320fn sort_files_for_build(root: &Path, files: &mut [PathBuf]) {
1321 let normalized_root = normalize_path_components(root);
1322 files.sort_by(|left, right| {
1323 let left_key = file_sort_key(&normalized_root, left);
1324 let right_key = file_sort_key(&normalized_root, right);
1325 left_key.cmp(&right_key).then_with(|| left.cmp(right))
1326 });
1327}
1328
1329fn file_sort_key(root: &Path, path: &Path) -> String {
1330 let normalized_path = normalize_path_components(path);
1331 let relative = normalized_path
1332 .strip_prefix(root)
1333 .unwrap_or(normalized_path.as_path());
1334 let mut key = relative.to_string_lossy().replace('\\', "/");
1335 if cfg!(windows) {
1336 key = key.to_ascii_lowercase();
1337 }
1338 key
1339}
1340
1341#[derive(Debug)]
1350pub(super) struct ParsedFile {
1351 pub(super) language: crate::graph::Language,
1353 pub(super) staging: StagingGraph,
1355}
1356
1357#[derive(Debug)]
1361pub(super) enum ParsedFileOutcome {
1362 Parsed(ParsedFile),
1363 Skipped,
1364 TimedOut {
1365 file: PathBuf,
1366 phase: &'static str,
1367 timeout_ms: u64,
1368 },
1369}
1370
1371pub(super) fn parse_file(path: &Path, plugins: &PluginManager) -> Result<ParsedFileOutcome> {
1382 let plugin = plugins.plugin_for_path(path);
1383 let Some(plugin) = plugin else {
1384 return Ok(ParsedFileOutcome::Skipped);
1385 };
1386
1387 let Some(builder) = plugin.graph_builder() else {
1388 return Ok(ParsedFileOutcome::Skipped);
1389 };
1390
1391 let reader =
1392 FileReader::open(path).with_context(|| format!("failed to read {}", path.display()))?;
1393 let raw_content = reader.as_slice();
1394
1395 let safe_parser = SafeParser::new(SafeParserConfig::new().with_max_input_size(
1396 usize::try_from(crate::config::buffers::max_source_file_size()).unwrap_or(usize::MAX),
1397 ));
1398 let prepared_content = plugin.preprocess(raw_content);
1399 let parse_content = prepared_content.as_ref();
1400 let parse_start = Instant::now();
1401 let tree = safe_parser
1402 .parse_file(&plugin.language(), parse_content, path)
1403 .map_err(|err| map_parse_error(path, err))?;
1404 let parse_duration = parse_start.elapsed();
1405 if parse_duration >= Duration::from_secs(2) {
1406 log::warn!("Slow parse ({parse_duration:.2?}): {}", path.display());
1407 }
1408
1409 let mut staging = StagingGraph::new();
1410 let build_start = Instant::now();
1411 match builder.build_graph(&tree, parse_content, path, &mut staging) {
1412 Ok(()) => {}
1413 Err(GraphBuilderError::BuildTimedOut {
1414 phase, timeout_ms, ..
1415 }) => {
1416 return Ok(ParsedFileOutcome::TimedOut {
1417 file: path.to_path_buf(),
1418 phase,
1419 timeout_ms,
1420 });
1421 }
1422 Err(err) => return Err(map_builder_error(path, &err)),
1423 }
1424 let build_duration = build_start.elapsed();
1425 if build_duration >= Duration::from_secs(2) {
1426 log::warn!(
1427 "Slow graph build ({build_duration:.2?}): {}",
1428 path.display()
1429 );
1430 }
1431
1432 staging.attach_body_hashes(raw_content);
1433
1434 Ok(ParsedFileOutcome::Parsed(ParsedFile {
1435 language: builder.language(),
1436 staging,
1437 }))
1438}
1439
1440fn map_parse_error(path: &Path, err: ParseError) -> anyhow::Error {
1441 match err {
1442 ParseError::TreeSitterFailed => {
1443 anyhow::anyhow!("tree-sitter failed to parse {}", path.display())
1444 }
1445 ParseError::LanguageSetFailed(reason) => anyhow::anyhow!(
1446 "failed to configure tree-sitter for {}: {}",
1447 path.display(),
1448 reason
1449 ),
1450 ParseError::InputTooLarge { size, max, .. } => anyhow::anyhow!(
1451 "input too large for {}: {} bytes exceeds {} byte parser limit",
1452 path.display(),
1453 size,
1454 max
1455 ),
1456 ParseError::ParseTimedOut { timeout_micros, .. } => anyhow::anyhow!(
1457 "parse timed out for {} after {} ms",
1458 path.display(),
1459 timeout_micros / 1000
1460 ),
1461 ParseError::ParseCancelled { reason, .. } => {
1462 anyhow::anyhow!("parse cancelled for {}: {}", path.display(), reason)
1463 }
1464 _ => anyhow::anyhow!("parse error in {}: {:?}", path.display(), err),
1465 }
1466}
1467
1468fn map_builder_error(path: &Path, err: &GraphBuilderError) -> anyhow::Error {
1469 anyhow::anyhow!("graph builder error in {}: {}", path.display(), err)
1470}
1471
1472#[cfg(any(test, feature = "rebuild-internals"))]
1493pub mod testing {
1494 use super::CancellationToken;
1495 use std::cell::RefCell;
1496
1497 pub type AfterChunkHook = Box<dyn FnMut(&CancellationToken)>;
1502 pub type BeforePhase4Hook = Box<dyn FnMut(&CancellationToken)>;
1505 pub type BeforePass5Hook = Box<dyn FnMut(&CancellationToken)>;
1507
1508 thread_local! {
1509 static AFTER_CHUNK_HOOK: RefCell<Option<AfterChunkHook>> = const { RefCell::new(None) };
1510 static BEFORE_PHASE4_HOOK: RefCell<Option<BeforePhase4Hook>> = const { RefCell::new(None) };
1511 static BEFORE_PASS5_HOOK: RefCell<Option<BeforePass5Hook>> = const { RefCell::new(None) };
1512 }
1513
1514 pub fn set_after_chunk_hook<F>(hook: F) -> Option<AfterChunkHook>
1517 where
1518 F: FnMut(&CancellationToken) + 'static,
1519 {
1520 AFTER_CHUNK_HOOK.with(|cell| cell.replace(Some(Box::new(hook))))
1521 }
1522
1523 pub fn clear_after_chunk_hook() {
1525 AFTER_CHUNK_HOOK.with(|cell| {
1526 let _ = cell.replace(None);
1527 });
1528 }
1529
1530 pub fn set_before_phase4_hook<F>(hook: F) -> Option<BeforePhase4Hook>
1533 where
1534 F: FnMut(&CancellationToken) + 'static,
1535 {
1536 BEFORE_PHASE4_HOOK.with(|cell| cell.replace(Some(Box::new(hook))))
1537 }
1538
1539 pub fn clear_before_phase4_hook() {
1541 BEFORE_PHASE4_HOOK.with(|cell| {
1542 let _ = cell.replace(None);
1543 });
1544 }
1545
1546 pub fn set_before_pass5_hook<F>(hook: F) -> Option<BeforePass5Hook>
1549 where
1550 F: FnMut(&CancellationToken) + 'static,
1551 {
1552 BEFORE_PASS5_HOOK.with(|cell| cell.replace(Some(Box::new(hook))))
1553 }
1554
1555 pub fn clear_before_pass5_hook() {
1557 BEFORE_PASS5_HOOK.with(|cell| {
1558 let _ = cell.replace(None);
1559 });
1560 }
1561
1562 pub(super) fn fire_after_chunk_hook(cancellation: &CancellationToken) {
1565 AFTER_CHUNK_HOOK.with(|cell| {
1566 if let Some(hook) = cell.borrow_mut().as_mut() {
1567 hook(cancellation);
1568 }
1569 });
1570 }
1571
1572 pub(super) fn fire_before_phase4_hook(cancellation: &CancellationToken) {
1574 BEFORE_PHASE4_HOOK.with(|cell| {
1575 if let Some(hook) = cell.borrow_mut().as_mut() {
1576 hook(cancellation);
1577 }
1578 });
1579 }
1580
1581 pub(super) fn fire_before_pass5_hook(cancellation: &CancellationToken) {
1583 BEFORE_PASS5_HOOK.with(|cell| {
1584 if let Some(hook) = cell.borrow_mut().as_mut() {
1585 hook(cancellation);
1586 }
1587 });
1588 }
1589
1590 pub struct AfterChunkHookGuard {
1594 _sealed: (),
1595 }
1596
1597 impl AfterChunkHookGuard {
1598 pub fn install<F>(hook: F) -> Self
1600 where
1601 F: FnMut(&CancellationToken) + 'static,
1602 {
1603 let _previous = set_after_chunk_hook(hook);
1604 Self { _sealed: () }
1605 }
1606 }
1607
1608 impl Drop for AfterChunkHookGuard {
1609 fn drop(&mut self) {
1610 clear_after_chunk_hook();
1611 }
1612 }
1613
1614 pub struct BeforePhase4HookGuard {
1617 _sealed: (),
1618 }
1619
1620 impl BeforePhase4HookGuard {
1621 pub fn install<F>(hook: F) -> Self
1623 where
1624 F: FnMut(&CancellationToken) + 'static,
1625 {
1626 let _previous = set_before_phase4_hook(hook);
1627 Self { _sealed: () }
1628 }
1629 }
1630
1631 impl Drop for BeforePhase4HookGuard {
1632 fn drop(&mut self) {
1633 clear_before_phase4_hook();
1634 }
1635 }
1636
1637 pub struct BeforePass5HookGuard {
1640 _sealed: (),
1641 }
1642
1643 impl BeforePass5HookGuard {
1644 pub fn install<F>(hook: F) -> Self
1646 where
1647 F: FnMut(&CancellationToken) + 'static,
1648 {
1649 let _previous = set_before_pass5_hook(hook);
1650 Self { _sealed: () }
1651 }
1652 }
1653
1654 impl Drop for BeforePass5HookGuard {
1655 fn drop(&mut self) {
1656 clear_before_pass5_hook();
1657 }
1658 }
1659}
1660
1661#[cfg(test)]
1662mod tests {
1663 use super::*;
1664 use crate::ast::Scope;
1665 use crate::graph::{GraphBuilder, GraphBuilderError, GraphResult, Language};
1666 use crate::plugin::error::{ParseError, ScopeError};
1667 use crate::plugin::{LanguageMetadata, LanguagePlugin};
1668 use serial_test::serial;
1669 use std::fs;
1670 use std::path::{Path, PathBuf};
1671 use tempfile::TempDir;
1672 use tree_sitter::{Parser, Tree};
1673
1674 const RUST_TEST_EXTENSIONS: &[&str] = &["rs"];
1675 const FILENAME_MATCH_EXTENSIONS: &[&str] = &["rmd", "bash_profile"];
1676
1677 fn commit_parsed_file_for_test(path: &Path, mut parsed: ParsedFile, graph: &mut CodeGraph) {
1682 let file_id = graph
1683 .files_mut()
1684 .register_with_language(path, Some(parsed.language))
1685 .expect("register file");
1686 parsed.staging.apply_file_id(file_id);
1687 let string_remap = parsed
1688 .staging
1689 .commit_strings(graph.strings_mut())
1690 .expect("commit strings");
1691 parsed
1692 .staging
1693 .apply_string_remap(&string_remap)
1694 .expect("apply string remap");
1695 let node_id_mapping = parsed
1696 .staging
1697 .commit_nodes(graph.nodes_mut())
1698 .expect("commit nodes");
1699 let edges = parsed.staging.get_remapped_edges(&node_id_mapping);
1700 for edge in edges {
1701 graph.edges_mut().add_edge_with_spans(
1702 edge.source,
1703 edge.target,
1704 edge.kind.clone(),
1705 file_id,
1706 edge.spans.clone(),
1707 );
1708 }
1709 }
1710
1711 fn expect_parsed_file(outcome: ParsedFileOutcome) -> ParsedFile {
1712 match outcome {
1713 ParsedFileOutcome::Parsed(parsed) => parsed,
1714 ParsedFileOutcome::Skipped => panic!("expected parsed file, got skipped outcome"),
1715 ParsedFileOutcome::TimedOut { file, phase, .. } => {
1716 panic!(
1717 "expected parsed file, got timeout outcome for {} during {}",
1718 file.display(),
1719 phase,
1720 )
1721 }
1722 }
1723 }
1724
1725 fn parse_rust_ast(content: &[u8]) -> Result<Tree, ParseError> {
1726 let mut parser = Parser::new();
1727 let language = tree_sitter_rust::LANGUAGE.into();
1728 parser
1729 .set_language(&language)
1730 .map_err(|err| ParseError::LanguageSetFailed(err.to_string()))?;
1731 parser
1732 .parse(content, None)
1733 .ok_or(ParseError::TreeSitterFailed)
1734 }
1735
1736 struct TestPlugin {
1737 metadata: LanguageMetadata,
1738 extensions: &'static [&'static str],
1739 builder: Option<Box<dyn GraphBuilder>>,
1740 }
1741
1742 impl TestPlugin {
1743 fn new(
1744 id: &'static str,
1745 extensions: &'static [&'static str],
1746 builder: Option<Box<dyn GraphBuilder>>,
1747 ) -> Self {
1748 Self {
1749 metadata: LanguageMetadata {
1750 id,
1751 name: "Rust",
1752 version: "test",
1753 author: "sqry-core tests",
1754 description: "Test-only Rust plugin for unified graph entrypoint tests",
1755 tree_sitter_version: "0.25",
1756 },
1757 extensions,
1758 builder,
1759 }
1760 }
1761 }
1762
1763 impl LanguagePlugin for TestPlugin {
1764 fn metadata(&self) -> LanguageMetadata {
1765 self.metadata.clone()
1766 }
1767
1768 fn extensions(&self) -> &'static [&'static str] {
1769 self.extensions
1770 }
1771
1772 fn language(&self) -> tree_sitter::Language {
1773 tree_sitter_rust::LANGUAGE.into()
1774 }
1775
1776 fn parse_ast(&self, content: &[u8]) -> Result<Tree, ParseError> {
1777 parse_rust_ast(content)
1778 }
1779
1780 fn extract_scopes(
1781 &self,
1782 _tree: &Tree,
1783 _content: &[u8],
1784 _file_path: &Path,
1785 ) -> Result<Vec<Scope>, ScopeError> {
1786 Ok(Vec::new())
1787 }
1788
1789 fn graph_builder(&self) -> Option<&dyn crate::graph::GraphBuilder> {
1790 self.builder.as_deref()
1791 }
1792 }
1793
1794 struct FailingGraphBuilder;
1795
1796 impl GraphBuilder for FailingGraphBuilder {
1797 fn build_graph(
1798 &self,
1799 _tree: &Tree,
1800 _content: &[u8],
1801 _file: &Path,
1802 _staging: &mut StagingGraph,
1803 ) -> GraphResult<()> {
1804 Err(GraphBuilderError::CrossLanguageError {
1805 reason: "forced failure".to_string(),
1806 })
1807 }
1808
1809 fn language(&self) -> Language {
1810 Language::Rust
1811 }
1812 }
1813
1814 struct NoopGraphBuilder;
1815
1816 impl GraphBuilder for NoopGraphBuilder {
1817 fn build_graph(
1818 &self,
1819 _tree: &Tree,
1820 _content: &[u8],
1821 _file: &Path,
1822 _staging: &mut StagingGraph,
1823 ) -> GraphResult<()> {
1824 Ok(())
1825 }
1826
1827 fn language(&self) -> Language {
1828 Language::Rust
1829 }
1830 }
1831
1832 struct TimeoutGraphBuilder;
1833
1834 impl GraphBuilder for TimeoutGraphBuilder {
1835 fn build_graph(
1836 &self,
1837 _tree: &Tree,
1838 _content: &[u8],
1839 file: &Path,
1840 _staging: &mut StagingGraph,
1841 ) -> GraphResult<()> {
1842 Err(GraphBuilderError::BuildTimedOut {
1843 file: file.to_path_buf(),
1844 phase: "test-timeout",
1845 timeout_ms: 42,
1846 })
1847 }
1848
1849 fn language(&self) -> Language {
1850 Language::Rust
1851 }
1852 }
1853
1854 struct SelectiveTimeoutGraphBuilder;
1855
1856 impl GraphBuilder for SelectiveTimeoutGraphBuilder {
1857 fn build_graph(
1858 &self,
1859 _tree: &Tree,
1860 _content: &[u8],
1861 file: &Path,
1862 staging: &mut StagingGraph,
1863 ) -> GraphResult<()> {
1864 use crate::graph::unified::build::helper::GraphBuildHelper;
1865
1866 let mut helper = GraphBuildHelper::new(staging, file, Language::Rust);
1867 let file_name = file
1868 .file_name()
1869 .and_then(|value| value.to_str())
1870 .unwrap_or_default();
1871
1872 if file_name == "timeout.rs" {
1873 helper.add_function("timeout_partial", None, false, false);
1874 return Err(GraphBuilderError::BuildTimedOut {
1875 file: file.to_path_buf(),
1876 phase: "test-timeout",
1877 timeout_ms: 42,
1878 });
1879 }
1880
1881 helper.add_function("survivor_fn", None, false, false);
1882 Ok(())
1883 }
1884
1885 fn language(&self) -> Language {
1886 Language::Rust
1887 }
1888 }
1889
1890 #[test]
1891 fn test_build_config_default() {
1892 let config = BuildConfig::default();
1893 assert_eq!(config.max_depth, None);
1894 assert!(!config.follow_links);
1895 assert!(!config.include_hidden);
1896 assert_eq!(config.num_threads, None);
1897 }
1898
1899 #[test]
1900 #[serial]
1901 fn test_find_source_files_excludes_generated_dependency_roots() {
1902 let temp_dir = TempDir::new().expect("temp dir");
1903 let root = temp_dir.path();
1904
1905 fs::write(root.join("src.rs"), "fn src() {}").expect("write source file");
1906 for dir in [
1907 "_work",
1908 "_actions",
1909 "_update",
1910 "externals.2.334.0",
1911 "node_modules",
1912 "target",
1913 "vendor",
1914 ] {
1915 let nested = root.join(dir).join("nested");
1916 fs::create_dir_all(&nested).expect("create excluded dir");
1917 fs::write(nested.join("ignored.rs"), "fn ignored() {}")
1918 .expect("write ignored source file");
1919 }
1920 for dir in ["external_tools", "vendorized"] {
1921 let nested = root.join(dir).join("nested");
1922 fs::create_dir_all(&nested).expect("create included sibling dir");
1923 fs::write(nested.join("included.rs"), "fn included() {}")
1924 .expect("write included source file");
1925 }
1926
1927 let config = BuildConfig::default();
1928 let mut relative_files: Vec<_> = find_source_files(root, &config)
1929 .iter()
1930 .map(|path| path.strip_prefix(root).expect("strip root").to_path_buf())
1931 .collect();
1932 relative_files.sort();
1933
1934 assert_eq!(
1935 relative_files,
1936 vec![
1937 PathBuf::from("external_tools/nested/included.rs"),
1938 PathBuf::from("src.rs"),
1939 PathBuf::from("vendorized/nested/included.rs"),
1940 ]
1941 );
1942 }
1943
1944 #[test]
1945 #[serial]
1946 fn test_find_source_files_can_include_default_excluded_roots() {
1947 let temp_dir = TempDir::new().expect("temp dir");
1948 let root = temp_dir.path();
1949 let nested = root.join("vendor").join("first_party");
1950 fs::create_dir_all(&nested).expect("create vendor dir");
1951 fs::write(nested.join("included.rs"), "fn included() {}").expect("write included source");
1952
1953 unsafe {
1954 std::env::set_var("SQRY_INCLUDE_DEFAULT_EXCLUDED_DIRS", "1");
1955 }
1956 let config = BuildConfig::default();
1957 let files = find_source_files(root, &config);
1958 unsafe {
1959 std::env::remove_var("SQRY_INCLUDE_DEFAULT_EXCLUDED_DIRS");
1960 }
1961
1962 let relative_files: Vec<_> = files
1963 .iter()
1964 .map(|path| path.strip_prefix(root).expect("strip root").to_path_buf())
1965 .collect();
1966
1967 assert_eq!(
1968 relative_files,
1969 vec![PathBuf::from("vendor/first_party/included.rs")]
1970 );
1971 }
1972
1973 #[test]
1974 fn test_build_unified_graph_empty_registry_error() {
1975 let plugins = PluginManager::new();
1976 let config = BuildConfig::default();
1977 let root = std::path::Path::new(".");
1978
1979 let result = build_unified_graph(root, &plugins, &config);
1980 let err = result.expect_err("empty registry must error");
1981 assert_eq!(
1988 err.to_string(),
1989 "Internal graph builder error: No graph builders registered – cannot build code graph"
1990 );
1991 }
1992
1993 #[test]
1994 fn test_build_unified_graph_no_graph_builders_error() {
1995 let mut plugins = PluginManager::new();
1996 plugins.register_builtin(Box::new(TestPlugin::new(
1997 "rust-no-graph-builder",
1998 RUST_TEST_EXTENSIONS,
1999 None,
2000 )));
2001 let config = BuildConfig::default();
2002 let root = std::path::Path::new(".");
2003
2004 let result = build_unified_graph(root, &plugins, &config);
2005 let err = result.expect_err("no graph builders must error");
2006 assert_eq!(
2007 err.to_string(),
2008 "Internal graph builder error: No graph builders registered – cannot build code graph"
2009 );
2010 }
2011
2012 #[test]
2013 fn test_build_unified_graph_all_failures_error() {
2014 let temp_dir = TempDir::new().expect("temp dir");
2015 let file_path = temp_dir.path().join("fail.rs");
2016 fs::write(&file_path, "fn main() {}").expect("write test file");
2017
2018 let mut plugins = PluginManager::new();
2019 plugins.register_builtin(Box::new(TestPlugin::new(
2020 "rust-failing-graph-builder",
2021 RUST_TEST_EXTENSIONS,
2022 Some(Box::new(FailingGraphBuilder)),
2023 )));
2024 let config = BuildConfig::default();
2025
2026 let result = build_unified_graph(temp_dir.path(), &plugins, &config);
2027 let err = result.expect_err("all-failures must error");
2028 assert_eq!(
2029 err.to_string(),
2030 "Internal graph builder error: All graph builds failed"
2031 );
2032 }
2033
2034 #[test]
2035 fn test_parse_file_matches_uppercase_extension() {
2036 let temp_dir = TempDir::new().expect("temp dir");
2037 let file_path = temp_dir.path().join("report.Rmd");
2038 fs::write(&file_path, "fn main() {}").expect("write test file");
2039
2040 let mut plugins = PluginManager::new();
2041 plugins.register_builtin(Box::new(TestPlugin::new(
2042 "rust-filename-match",
2043 FILENAME_MATCH_EXTENSIONS,
2044 Some(Box::new(NoopGraphBuilder)),
2045 )));
2046 let mut graph = CodeGraph::new();
2047
2048 let parsed = expect_parsed_file(parse_file(&file_path, &plugins).expect("parse file"));
2049 commit_parsed_file_for_test(&file_path, parsed, &mut graph);
2050 }
2051
2052 #[test]
2053 fn test_parse_file_matches_dotless_filename() {
2054 let temp_dir = TempDir::new().expect("temp dir");
2055 let file_path = temp_dir.path().join("bash_profile");
2056 fs::write(&file_path, "fn main() {}").expect("write test file");
2057
2058 let mut plugins = PluginManager::new();
2059 plugins.register_builtin(Box::new(TestPlugin::new(
2060 "rust-filename-match",
2061 FILENAME_MATCH_EXTENSIONS,
2062 Some(Box::new(NoopGraphBuilder)),
2063 )));
2064 let mut graph = CodeGraph::new();
2065
2066 let parsed = expect_parsed_file(parse_file(&file_path, &plugins).expect("parse file"));
2067 commit_parsed_file_for_test(&file_path, parsed, &mut graph);
2068 }
2069
2070 #[test]
2071 fn test_parse_file_matches_pulumi_stack_filename() {
2072 let temp_dir = TempDir::new().expect("temp dir");
2073 let file_path = temp_dir.path().join("Pulumi.dev.yaml");
2074 fs::write(&file_path, "fn main() {}").expect("write test file");
2075
2076 let mut plugins = PluginManager::new();
2077 plugins.register_builtin(Box::new(TestPlugin::new(
2078 "pulumi",
2079 &["pulumi.yaml"],
2080 Some(Box::new(NoopGraphBuilder)),
2081 )));
2082 let mut graph = CodeGraph::new();
2083
2084 let parsed = expect_parsed_file(parse_file(&file_path, &plugins).expect("parse file"));
2085 commit_parsed_file_for_test(&file_path, parsed, &mut graph);
2086 }
2087
2088 #[test]
2089 fn test_parse_file_returns_timed_out_outcome() {
2090 let temp_dir = TempDir::new().expect("temp dir");
2091 let file_path = temp_dir.path().join("timeout.rs");
2092 fs::write(&file_path, "fn main() {}").expect("write test file");
2093
2094 let mut plugins = PluginManager::new();
2095 plugins.register_builtin(Box::new(TestPlugin::new(
2096 "rust-timeout",
2097 RUST_TEST_EXTENSIONS,
2098 Some(Box::new(TimeoutGraphBuilder)),
2099 )));
2100
2101 let outcome = parse_file(&file_path, &plugins).expect("parse file");
2102 match outcome {
2103 ParsedFileOutcome::TimedOut {
2104 file,
2105 phase,
2106 timeout_ms,
2107 } => {
2108 assert_eq!(file, file_path);
2109 assert_eq!(phase, "test-timeout");
2110 assert_eq!(timeout_ms, 42);
2111 }
2112 other => panic!("expected timed out outcome, got {other:?}"),
2113 }
2114 }
2115
2116 #[test]
2117 fn test_parse_file_rejects_oversized_input() {
2118 let temp_dir = TempDir::new().expect("temp dir");
2119 let file_path = temp_dir.path().join("oversized.rs");
2120 fs::write(&file_path, vec![b'a'; 1_048_577]).expect("write oversized file");
2121
2122 let mut plugins = PluginManager::new();
2123 plugins.register_builtin(Box::new(TestPlugin::new(
2124 "rust-oversized",
2125 RUST_TEST_EXTENSIONS,
2126 Some(Box::new(NoopGraphBuilder)),
2127 )));
2128
2129 unsafe {
2130 std::env::set_var("SQRY_MAX_SOURCE_FILE_SIZE", "1048576");
2131 }
2132 let err = parse_file(&file_path, &plugins).expect_err("oversized file should fail");
2133 unsafe {
2134 std::env::remove_var("SQRY_MAX_SOURCE_FILE_SIZE");
2135 }
2136
2137 let err_text = err.to_string();
2138 assert!(err_text.contains("oversized.rs"));
2139 }
2140
2141 #[test]
2142 fn test_build_unified_graph_skips_timed_out_file_without_partial_commit() {
2143 let temp_dir = TempDir::new().expect("temp dir");
2144 let ok_path = temp_dir.path().join("ok.rs");
2145 let timeout_path = temp_dir.path().join("timeout.rs");
2146 fs::write(&ok_path, "fn ok() {}").expect("write ok file");
2147 fs::write(&timeout_path, "fn timeout() {}").expect("write timeout file");
2148
2149 let mut plugins = PluginManager::new();
2150 plugins.register_builtin(Box::new(TestPlugin::new(
2151 "rust-selective-timeout",
2152 RUST_TEST_EXTENSIONS,
2153 Some(Box::new(SelectiveTimeoutGraphBuilder)),
2154 )));
2155 let config = BuildConfig::default();
2156
2157 let graph = build_unified_graph(temp_dir.path(), &plugins, &config)
2158 .expect("graph build should succeed with surviving files");
2159 let snapshot = graph.snapshot();
2160
2161 assert_eq!(snapshot.find_by_pattern("survivor_fn").len(), 1);
2162 assert!(
2163 snapshot.find_by_pattern("timeout_partial").is_empty(),
2164 "timed out file staging must not be committed"
2165 );
2166 }
2167
2168 struct SimpleGraphBuilder;
2174
2175 impl GraphBuilder for SimpleGraphBuilder {
2176 fn build_graph(
2177 &self,
2178 _tree: &Tree,
2179 _content: &[u8],
2180 file: &Path,
2181 staging: &mut StagingGraph,
2182 ) -> GraphResult<()> {
2183 use crate::graph::unified::build::helper::GraphBuildHelper;
2184
2185 let mut helper = GraphBuildHelper::new(staging, file, Language::Rust);
2186
2187 let fn1 = helper.add_function("main", None, false, false);
2189 let fn2 = helper.add_function("helper", None, false, false);
2190
2191 helper.add_call_edge(fn1, fn2);
2193
2194 Ok(())
2195 }
2196
2197 fn language(&self) -> Language {
2198 Language::Rust
2199 }
2200 }
2201
2202 #[test]
2204 fn test_build_and_persist_graph_returns_build_result() {
2205 let temp_dir = TempDir::new().expect("temp dir");
2206 let file_path = temp_dir.path().join("test.rs");
2207 fs::write(&file_path, "fn main() {} fn helper() {}").expect("write test file");
2208
2209 let mut plugins = PluginManager::new();
2210 plugins.register_builtin(Box::new(TestPlugin::new(
2211 "rust-simple",
2212 RUST_TEST_EXTENSIONS,
2213 Some(Box::new(SimpleGraphBuilder)),
2214 )));
2215 let config = BuildConfig::default();
2216
2217 let result =
2218 build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:build_result");
2219 assert!(result.is_ok(), "build_and_persist_graph should succeed");
2220
2221 let (_graph, build_result) = result.unwrap();
2222 assert!(build_result.node_count > 0, "Should have nodes");
2223 assert!(build_result.total_files > 0, "Should have indexed files");
2224 assert!(!build_result.built_at.is_empty(), "Should have timestamp");
2225 assert!(!build_result.root_path.is_empty(), "Should have root path");
2226 }
2227
2228 #[test]
2230 fn test_build_result_edge_count_le_raw() {
2231 let temp_dir = TempDir::new().expect("temp dir");
2232 let file_path = temp_dir.path().join("test.rs");
2233 fs::write(&file_path, "fn main() {} fn helper() {}").expect("write test file");
2234
2235 let mut plugins = PluginManager::new();
2236 plugins.register_builtin(Box::new(TestPlugin::new(
2237 "rust-simple",
2238 RUST_TEST_EXTENSIONS,
2239 Some(Box::new(SimpleGraphBuilder)),
2240 )));
2241 let config = BuildConfig::default();
2242
2243 let (_graph, build_result) =
2244 build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:edge_count").unwrap();
2245
2246 assert!(
2247 build_result.edge_count <= build_result.raw_edge_count,
2248 "Deduplicated edge count ({}) should be <= raw edge count ({})",
2249 build_result.edge_count,
2250 build_result.raw_edge_count
2251 );
2252 }
2253
2254 #[test]
2256 fn test_build_and_persist_graph_file_counts_use_plugins() {
2257 let temp_dir = TempDir::new().expect("temp dir");
2258 let file_path = temp_dir.path().join("test.rs");
2259 fs::write(&file_path, "fn main() {}").expect("write test file");
2260
2261 let mut plugins = PluginManager::new();
2262 plugins.register_builtin(Box::new(TestPlugin::new(
2263 "rust-simple",
2264 RUST_TEST_EXTENSIONS,
2265 Some(Box::new(SimpleGraphBuilder)),
2266 )));
2267 let config = BuildConfig::default();
2268
2269 let (_graph, build_result) =
2270 build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:file_counts")
2271 .unwrap();
2272
2273 assert!(
2275 !build_result.file_count.is_empty(),
2276 "File counts should not be empty"
2277 );
2278 assert!(
2279 build_result.file_count.contains_key("rust-simple"),
2280 "File counts should use plugin ID. Got: {:?}",
2281 build_result.file_count
2282 );
2283 }
2284
2285 #[test]
2287 fn test_manifest_edge_count_is_deduplicated() {
2288 use crate::graph::unified::persistence::GraphStorage;
2289
2290 let temp_dir = TempDir::new().expect("temp dir");
2291 let file_path = temp_dir.path().join("test.rs");
2292 fs::write(&file_path, "fn main() {} fn helper() {}").expect("write test file");
2293
2294 let mut plugins = PluginManager::new();
2295 plugins.register_builtin(Box::new(TestPlugin::new(
2296 "rust-simple",
2297 RUST_TEST_EXTENSIONS,
2298 Some(Box::new(SimpleGraphBuilder)),
2299 )));
2300 let config = BuildConfig::default();
2301
2302 let (_graph, build_result) =
2303 build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:manifest_dedup")
2304 .unwrap();
2305
2306 let storage = GraphStorage::new(temp_dir.path());
2308 assert!(storage.exists(), "Manifest should exist after build");
2309
2310 let manifest = storage.load_manifest().unwrap();
2311 assert_eq!(
2312 manifest.edge_count, build_result.edge_count,
2313 "Manifest edge_count should match BuildResult (deduplicated)"
2314 );
2315 assert_eq!(
2316 manifest.raw_edge_count,
2317 Some(build_result.raw_edge_count),
2318 "Manifest raw_edge_count should match BuildResult"
2319 );
2320 }
2321
2322 #[test]
2324 fn test_build_command_provenance() {
2325 use crate::graph::unified::persistence::GraphStorage;
2326
2327 let temp_dir = TempDir::new().expect("temp dir");
2328 let file_path = temp_dir.path().join("test.rs");
2329 fs::write(&file_path, "fn main() {}").expect("write test file");
2330
2331 let mut plugins = PluginManager::new();
2332 plugins.register_builtin(Box::new(TestPlugin::new(
2333 "rust-simple",
2334 RUST_TEST_EXTENSIONS,
2335 Some(Box::new(SimpleGraphBuilder)),
2336 )));
2337 let config = BuildConfig::default();
2338
2339 build_and_persist_graph(temp_dir.path(), &plugins, &config, "cli:index").unwrap();
2340
2341 let storage = GraphStorage::new(temp_dir.path());
2342 let manifest = storage.load_manifest().unwrap();
2343 assert_eq!(
2344 manifest.build_provenance.build_command, "cli:index",
2345 "Build command provenance should match"
2346 );
2347 }
2348
2349 #[test]
2353 fn test_wrapper_infers_plugin_selection_from_manager() {
2354 use crate::graph::unified::persistence::GraphStorage;
2355
2356 let temp_dir = TempDir::new().expect("temp dir");
2357 let file_path = temp_dir.path().join("test.rs");
2358 fs::write(&file_path, "fn main() {}").expect("write test file");
2359
2360 let mut plugins = PluginManager::new();
2361 plugins.register_builtin(Box::new(TestPlugin::new(
2362 "rust-simple",
2363 RUST_TEST_EXTENSIONS,
2364 Some(Box::new(SimpleGraphBuilder)),
2365 )));
2366 let config = BuildConfig::default();
2367
2368 let (_graph, build_result) =
2369 build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:wrapper_plugins")
2370 .expect("wrapper build should succeed");
2371
2372 assert_eq!(
2373 build_result.active_plugin_ids,
2374 vec!["rust-simple".to_string()],
2375 "build result should expose the inferred active plugin ids"
2376 );
2377
2378 let storage = GraphStorage::new(temp_dir.path());
2379 let manifest = storage.load_manifest().expect("manifest should load");
2380 let plugin_selection = manifest
2381 .plugin_selection
2382 .expect("wrapper should persist plugin selection metadata");
2383 assert_eq!(
2384 plugin_selection.active_plugin_ids,
2385 vec!["rust-simple".to_string()],
2386 "wrapper should persist the manager-derived plugin ids"
2387 );
2388 assert_eq!(
2389 plugin_selection.high_cost_mode, None,
2390 "wrapper-inferred plugin selection should keep high_cost_mode diagnostic-only"
2391 );
2392 }
2393
2394 #[test]
2396 fn test_analysis_identity_matches_manifest_hash() {
2397 use crate::graph::unified::analysis::persistence::load_csr;
2398 use crate::graph::unified::persistence::GraphStorage;
2399 use sha2::{Digest, Sha256};
2400
2401 let temp_dir = TempDir::new().expect("temp dir");
2402 let file_path = temp_dir.path().join("test.rs");
2403 fs::write(&file_path, "fn main() {} fn helper() {}").expect("write test file");
2404
2405 let mut plugins = PluginManager::new();
2406 plugins.register_builtin(Box::new(TestPlugin::new(
2407 "rust-simple",
2408 RUST_TEST_EXTENSIONS,
2409 Some(Box::new(SimpleGraphBuilder)),
2410 )));
2411 let config = BuildConfig::default();
2412
2413 build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:identity").unwrap();
2414
2415 let storage = GraphStorage::new(temp_dir.path());
2416
2417 let manifest_bytes = std::fs::read(storage.manifest_path()).unwrap();
2419 let expected_hash = hex::encode(Sha256::digest(&manifest_bytes));
2420
2421 let (_csr, identity) = load_csr(&storage.analysis_csr_path()).unwrap();
2423
2424 assert_eq!(
2425 identity.manifest_hash, expected_hash,
2426 "On-disk manifest hash should equal analysis identity hash"
2427 );
2428 }
2429
2430 #[test]
2437 fn test_old_manifest_removed_during_rebuild() {
2438 use crate::graph::unified::persistence::GraphStorage;
2439
2440 let temp_dir = tempfile::TempDir::new().unwrap();
2441 let src = temp_dir.path().join("lib.rs");
2442 std::fs::write(&src, "fn main() {}").unwrap();
2443
2444 let mut plugins = PluginManager::new();
2446 plugins.register_builtin(Box::new(TestPlugin::new(
2447 "rust-simple",
2448 RUST_TEST_EXTENSIONS,
2449 Some(Box::new(SimpleGraphBuilder)),
2450 )));
2451 let config = BuildConfig::default();
2452 build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:initial").unwrap();
2453
2454 let storage = GraphStorage::new(temp_dir.path());
2455 assert!(
2456 storage.exists(),
2457 "Manifest should exist after initial build"
2458 );
2459
2460 let original_manifest = storage.load_manifest().unwrap();
2462 let original_built_at = original_manifest.built_at.clone();
2463
2464 build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:rebuild").unwrap();
2466
2467 let new_manifest = storage.load_manifest().unwrap();
2469 assert_ne!(
2470 original_built_at, new_manifest.built_at,
2471 "Manifest should have been replaced with new timestamp"
2472 );
2473 assert_eq!(
2474 new_manifest.build_provenance.build_command, "test:rebuild",
2475 "Manifest should reflect the rebuild provenance"
2476 );
2477 }
2478
2479 #[test]
2493 fn test_failed_rebuild_leaves_index_not_ready() {
2494 use crate::graph::unified::persistence::GraphStorage;
2495
2496 let temp_dir = tempfile::TempDir::new().unwrap();
2497 let src = temp_dir.path().join("lib.rs");
2498 std::fs::write(&src, "fn main() {}").unwrap();
2499
2500 let mut plugins = PluginManager::new();
2502 plugins.register_builtin(Box::new(TestPlugin::new(
2503 "rust-simple",
2504 RUST_TEST_EXTENSIONS,
2505 Some(Box::new(SimpleGraphBuilder)),
2506 )));
2507 let config = BuildConfig::default();
2508 build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:initial").unwrap();
2509
2510 let storage = GraphStorage::new(temp_dir.path());
2511 assert!(
2512 storage.exists(),
2513 "Manifest should exist after initial build"
2514 );
2515
2516 let analysis_dir = storage.analysis_dir().to_path_buf();
2522 std::fs::remove_dir_all(&analysis_dir).unwrap();
2523 std::fs::write(&analysis_dir, b"blocker").unwrap();
2524
2525 let result =
2527 build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:failed_rebuild");
2528
2529 std::fs::remove_file(&analysis_dir).unwrap();
2531 std::fs::create_dir_all(&analysis_dir).unwrap();
2532
2533 assert!(
2535 result.is_err(),
2536 "Rebuild should fail when analysis dir is read-only"
2537 );
2538
2539 assert!(
2541 !storage.exists(),
2542 "After failed rebuild, manifest should have been removed — index is NOT ready"
2543 );
2544
2545 assert!(
2547 storage.snapshot_exists(),
2548 "Snapshot should still exist on disk (written before failure)"
2549 );
2550 }
2551
2552 struct DuplicateCallsGraphBuilder;
2556
2557 impl GraphBuilder for DuplicateCallsGraphBuilder {
2558 fn build_graph(
2559 &self,
2560 _tree: &Tree,
2561 _content: &[u8],
2562 file: &Path,
2563 staging: &mut StagingGraph,
2564 ) -> GraphResult<()> {
2565 use crate::graph::unified::build::helper::GraphBuildHelper;
2566
2567 let mut helper = GraphBuildHelper::new(staging, file, Language::Rust);
2568 let fn1 = helper.add_function("main", None, false, false);
2569 let fn2 = helper.add_function("helper", None, false, false);
2570
2571 helper.add_call_edge(fn1, fn2);
2573 helper.add_call_edge(fn1, fn2);
2574
2575 Ok(())
2576 }
2577
2578 fn language(&self) -> Language {
2579 Language::Rust
2580 }
2581 }
2582
2583 #[test]
2585 fn test_persisted_snapshot_compacts_both_edge_stores_before_save() {
2586 use crate::graph::unified::persistence::{GraphStorage, load_from_path};
2587
2588 let temp_dir = TempDir::new().expect("temp dir");
2589 let file_path = temp_dir.path().join("test.rs");
2590 fs::write(&file_path, "fn main() {} fn helper() {}").expect("write test file");
2591
2592 let mut plugins = PluginManager::new();
2593 plugins.register_builtin(Box::new(TestPlugin::new(
2594 "rust-simple",
2595 RUST_TEST_EXTENSIONS,
2596 Some(Box::new(SimpleGraphBuilder)),
2597 )));
2598 let config = BuildConfig::default();
2599
2600 let _result =
2601 build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:csr_compact")
2602 .expect("build should succeed");
2603
2604 let storage = GraphStorage::new(temp_dir.path());
2606 let loaded = load_from_path(storage.snapshot_path(), None).expect("load should succeed");
2607
2608 assert!(
2609 loaded.edges().forward().csr().is_some(),
2610 "Forward store must have CSR after persistence"
2611 );
2612 assert!(
2613 loaded.edges().reverse().csr().is_some(),
2614 "Reverse store must have CSR after persistence"
2615 );
2616
2617 let stats = loaded.edges().stats();
2618 assert_eq!(
2619 stats.forward.delta_edge_count, 0,
2620 "Forward delta must be empty after persistence"
2621 );
2622 assert_eq!(
2623 stats.reverse.delta_edge_count, 0,
2624 "Reverse delta must be empty after persistence"
2625 );
2626 }
2627
2628 #[test]
2630 fn test_loaded_snapshot_edges_to_works_after_round_trip() {
2631 use crate::graph::unified::edge::EdgeKind;
2632 use crate::graph::unified::persistence::{GraphStorage, load_from_path};
2633 use crate::graph::unified::{
2634 FileScope, ResolutionMode, SymbolCandidateOutcome, SymbolQuery,
2635 };
2636
2637 let temp_dir = TempDir::new().expect("temp dir");
2638 let file_path = temp_dir.path().join("test.rs");
2639 fs::write(&file_path, "fn main() {} fn helper() {}").expect("write test file");
2640
2641 let mut plugins = PluginManager::new();
2642 plugins.register_builtin(Box::new(TestPlugin::new(
2643 "rust-simple",
2644 RUST_TEST_EXTENSIONS,
2645 Some(Box::new(SimpleGraphBuilder)),
2646 )));
2647 let config = BuildConfig::default();
2648
2649 build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:round_trip")
2650 .expect("build should succeed");
2651
2652 let storage = GraphStorage::new(temp_dir.path());
2653 let loaded = load_from_path(storage.snapshot_path(), None).expect("load should succeed");
2654
2655 let snapshot = loaded.snapshot();
2657
2658 let main_id = match snapshot.find_symbol_candidates(&SymbolQuery {
2659 symbol: "main",
2660 file_scope: FileScope::Any,
2661 mode: ResolutionMode::AllowSuffixCandidates,
2662 }) {
2663 SymbolCandidateOutcome::Candidates(ids) => ids[0],
2664 _ => panic!("main node must exist"),
2665 };
2666
2667 let helper_id = match snapshot.find_symbol_candidates(&SymbolQuery {
2668 symbol: "helper",
2669 file_scope: FileScope::Any,
2670 mode: ResolutionMode::AllowSuffixCandidates,
2671 }) {
2672 SymbolCandidateOutcome::Candidates(ids) => ids[0],
2673 _ => panic!("helper node must exist"),
2674 };
2675
2676 let forward_edges = loaded.edges().edges_from(main_id);
2678 let has_call = forward_edges
2679 .iter()
2680 .any(|e| e.target == helper_id && matches!(e.kind, EdgeKind::Calls { .. }));
2681 assert!(has_call, "Forward traversal: main should call helper");
2682
2683 let reverse_edges = loaded.edges().edges_to(helper_id);
2685 let has_caller = reverse_edges
2686 .iter()
2687 .any(|e| e.source == main_id && matches!(e.kind, EdgeKind::Calls { .. }));
2688 assert!(
2689 has_caller,
2690 "Reverse traversal: helper should have main as caller"
2691 );
2692 }
2693
2694 #[test]
2696 fn test_raw_edge_count_preserved_across_pre_save_compaction() {
2697 use crate::graph::unified::persistence::GraphStorage;
2698
2699 let temp_dir = TempDir::new().expect("temp dir");
2700 let file_path = temp_dir.path().join("test.rs");
2701 fs::write(&file_path, "fn main() {} fn helper() {}").expect("write test file");
2702
2703 let mut plugins = PluginManager::new();
2704 plugins.register_builtin(Box::new(TestPlugin::new(
2705 "rust-dup",
2706 RUST_TEST_EXTENSIONS,
2707 Some(Box::new(DuplicateCallsGraphBuilder)),
2708 )));
2709 let config = BuildConfig::default();
2710
2711 let (_graph, build_result) =
2712 build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:raw_edge_count")
2713 .expect("build should succeed");
2714
2715 assert!(
2716 build_result.raw_edge_count > build_result.edge_count,
2717 "raw_edge_count ({}) must be > edge_count ({}) for duplicate builder",
2718 build_result.raw_edge_count,
2719 build_result.edge_count
2720 );
2721
2722 let storage = GraphStorage::new(temp_dir.path());
2724 let manifest = storage.load_manifest().expect("manifest should load");
2725
2726 assert_eq!(
2727 manifest.raw_edge_count,
2728 Some(build_result.raw_edge_count),
2729 "Manifest raw_edge_count must match build result"
2730 );
2731 assert_eq!(
2732 manifest.edge_count, build_result.edge_count,
2733 "Manifest edge_count must match build result"
2734 );
2735 }
2736
2737 #[test]
2739 fn test_build_save_load_query_round_trip_preserves_edge_queries() {
2740 use crate::graph::unified::edge::EdgeKind;
2741 use crate::graph::unified::persistence::{GraphStorage, load_from_path};
2742 use crate::graph::unified::{
2743 FileScope, ResolutionMode, SymbolCandidateOutcome, SymbolQuery,
2744 };
2745
2746 let temp_dir = TempDir::new().expect("temp dir");
2747 let file_path = temp_dir.path().join("test.rs");
2748 fs::write(&file_path, "fn main() {} fn helper() {}").expect("write test file");
2749
2750 let mut plugins = PluginManager::new();
2751 plugins.register_builtin(Box::new(TestPlugin::new(
2752 "rust-simple",
2753 RUST_TEST_EXTENSIONS,
2754 Some(Box::new(SimpleGraphBuilder)),
2755 )));
2756 let config = BuildConfig::default();
2757
2758 let (_original_graph, build_result) =
2759 build_and_persist_graph(temp_dir.path(), &plugins, &config, "test:full_round_trip")
2760 .expect("build should succeed");
2761
2762 let storage = GraphStorage::new(temp_dir.path());
2764 let loaded = load_from_path(storage.snapshot_path(), None).expect("load should succeed");
2765
2766 assert_eq!(
2768 loaded.edge_count(),
2769 build_result.edge_count,
2770 "Loaded graph edge count must match build result dedup count"
2771 );
2772
2773 assert_eq!(
2775 loaded.node_count(),
2776 build_result.node_count,
2777 "Loaded graph node count must match build result"
2778 );
2779
2780 let snapshot = loaded.snapshot();
2782
2783 let main_id = match snapshot.find_symbol_candidates(&SymbolQuery {
2784 symbol: "main",
2785 file_scope: FileScope::Any,
2786 mode: ResolutionMode::AllowSuffixCandidates,
2787 }) {
2788 SymbolCandidateOutcome::Candidates(ids) => {
2789 assert!(!ids.is_empty(), "main must exist");
2790 ids[0]
2791 }
2792 _ => panic!("main node must exist"),
2793 };
2794
2795 let helper_id = match snapshot.find_symbol_candidates(&SymbolQuery {
2796 symbol: "helper",
2797 file_scope: FileScope::Any,
2798 mode: ResolutionMode::AllowSuffixCandidates,
2799 }) {
2800 SymbolCandidateOutcome::Candidates(ids) => {
2801 assert!(!ids.is_empty(), "helper must exist");
2802 ids[0]
2803 }
2804 _ => panic!("helper node must exist"),
2805 };
2806
2807 let fwd = loaded.edges().edges_from(main_id);
2809 let has_fwd_call = fwd
2810 .iter()
2811 .any(|e| e.target == helper_id && matches!(e.kind, EdgeKind::Calls { .. }));
2812 assert!(has_fwd_call, "edges_from(main) must include call to helper");
2813
2814 let rev = loaded.edges().edges_to(helper_id);
2816 let has_rev_call = rev
2817 .iter()
2818 .any(|e| e.source == main_id && matches!(e.kind, EdgeKind::Calls { .. }));
2819 assert!(has_rev_call, "edges_to(helper) must include caller main");
2820 }
2821
2822 fn build_rust_test_fixture(dir: &Path, file_count: usize) {
2842 for i in 0..file_count {
2843 let path = dir.join(format!("fixture_{i}.rs"));
2844 fs::write(&path, format!("pub fn fn_{i}() {{ let _ = {i}; }}")).expect("write fixture");
2845 }
2846 }
2847
2848 fn make_rust_test_plugins() -> PluginManager {
2849 let mut plugins = PluginManager::new();
2850 plugins.register_builtin(Box::new(TestPlugin::new(
2851 "rust-noop-for-cancellation-tests",
2852 RUST_TEST_EXTENSIONS,
2853 Some(Box::new(NoopGraphBuilder)),
2854 )));
2855 plugins
2856 }
2857
2858 #[test]
2859 fn build_unified_graph_cancellable_preflight_cancellation_returns_cancelled() {
2860 let tmp = TempDir::new().expect("tmp");
2861 build_rust_test_fixture(tmp.path(), 4);
2862 let plugins = make_rust_test_plugins();
2863 let config = BuildConfig::default();
2864
2865 let cancel = CancellationToken::new();
2866 cancel.cancel();
2867
2868 let result = build_unified_graph_cancellable(tmp.path(), &plugins, &config, &cancel);
2869 let err = result.expect_err("pre-cancelled token must short-circuit");
2870 assert!(
2871 matches!(err, GraphBuilderError::Cancelled),
2872 "expected Cancelled, got: {err:?}"
2873 );
2874 }
2875
2876 #[test]
2877 fn build_unified_graph_cancellable_mid_chunk_cancellation_returns_cancelled() {
2878 let tmp = TempDir::new().expect("tmp");
2879 build_rust_test_fixture(tmp.path(), 8);
2881 let plugins = make_rust_test_plugins();
2882 let config = BuildConfig {
2884 staging_memory_limit: 1,
2885 ..BuildConfig::default()
2886 };
2887
2888 let cancel = CancellationToken::new();
2889
2890 let cancel_for_hook = cancel.clone();
2895 let mut call_count = 0u32;
2896 let _guard = testing::AfterChunkHookGuard::install(move |tok| {
2897 call_count += 1;
2898 if call_count >= 2 {
2899 cancel_for_hook.cancel();
2900 assert!(tok.is_cancelled());
2902 }
2903 });
2904
2905 let result = build_unified_graph_cancellable(tmp.path(), &plugins, &config, &cancel);
2906 let err = result.expect_err("mid-chunk cancellation must short-circuit");
2907 assert!(
2908 matches!(err, GraphBuilderError::Cancelled),
2909 "expected Cancelled, got: {err:?}"
2910 );
2911 }
2912
2913 #[test]
2914 fn build_unified_graph_cancellable_pre_phase4_cancellation_short_circuits() {
2915 let tmp = TempDir::new().expect("tmp");
2916 build_rust_test_fixture(tmp.path(), 4);
2917 let plugins = make_rust_test_plugins();
2918 let config = BuildConfig::default();
2919
2920 let cancel = CancellationToken::new();
2921 let cancel_for_hook = cancel.clone();
2922 let _guard = testing::BeforePhase4HookGuard::install(move |_tok| {
2923 cancel_for_hook.cancel();
2924 });
2925
2926 let result = build_unified_graph_cancellable(tmp.path(), &plugins, &config, &cancel);
2927 let err = result.expect_err("pre-Phase-4 cancellation must short-circuit");
2928 assert!(
2929 matches!(err, GraphBuilderError::Cancelled),
2930 "expected Cancelled, got: {err:?}"
2931 );
2932 }
2933
2934 #[test]
2935 fn build_unified_graph_cancellable_pre_pass5_cancellation_short_circuits() {
2936 let tmp = TempDir::new().expect("tmp");
2937 build_rust_test_fixture(tmp.path(), 4);
2938 let plugins = make_rust_test_plugins();
2939 let config = BuildConfig::default();
2940
2941 let cancel = CancellationToken::new();
2942 let cancel_for_hook = cancel.clone();
2943 let _guard = testing::BeforePass5HookGuard::install(move |_tok| {
2944 cancel_for_hook.cancel();
2945 });
2946
2947 let result = build_unified_graph_cancellable(tmp.path(), &plugins, &config, &cancel);
2948 let err = result.expect_err("pre-Pass-5 cancellation must short-circuit");
2949 assert!(
2950 matches!(err, GraphBuilderError::Cancelled),
2951 "expected Cancelled, got: {err:?}"
2952 );
2953 }
2954
2955 #[test]
2956 fn build_unified_graph_default_path_is_backwards_compatible() {
2957 let tmp = TempDir::new().expect("tmp");
2958 build_rust_test_fixture(tmp.path(), 3);
2959 let plugins = make_rust_test_plugins();
2960 let config = BuildConfig::default();
2961
2962 let _graph = build_unified_graph(tmp.path(), &plugins, &config)
2967 .expect("legacy path must still build successfully");
2968 }
2969}