1use crate::args::Cli;
6use crate::commands::graph::loader::{GraphLoadConfig, load_unified_graph_for_cli, no_op_reporter};
7use crate::index_discovery::find_nearest_index;
8use crate::output::OutputStreams;
9use anyhow::{Context, Result};
10use serde::Serialize;
11use sqry_core::graph::unified::analysis::{
12 AnalysisIdentity, GraphAnalyses, compute_manifest_hash, compute_node_id_hash,
13 resolve_label_budget_config,
14};
15use sqry_core::graph::unified::compaction::snapshot_edges;
16use sqry_core::graph::unified::persistence::GraphStorage;
17use std::time::Instant;
18
19#[derive(Debug, Serialize)]
21struct AnalysisStats {
22 node_count: u32,
24 edge_count: u32,
26 scc_stats: Vec<SccStats>,
28 build_time_secs: f64,
30}
31
32#[derive(Debug, Serialize)]
33struct SccStats {
34 edge_kind: String,
35 scc_count: u32,
36 non_trivial_count: u32,
37 max_scc_size: u32,
38}
39
40fn has_fresh_analysis(storage: &GraphStorage) -> bool {
41 let manifest_hash = compute_manifest_hash(storage.manifest_path()).ok();
42 manifest_hash.is_some_and(|hash| {
43 ["calls", "imports", "references", "inherits"]
44 .iter()
45 .all(|kind| {
46 let scc_path = storage.analysis_scc_path(kind);
47 let cond_path = storage.analysis_cond_path(kind);
48 scc_path.exists()
49 && cond_path.exists()
50 && sqry_core::graph::unified::analysis::persistence::load_scc_manifest_checked(
51 &scc_path, &hash,
52 )
53 .is_ok()
54 && sqry_core::graph::unified::analysis::persistence::load_condensation_manifest_checked(
55 &cond_path, &hash,
56 )
57 .is_ok()
58 })
59 })
60}
61
62fn collect_analysis_stats(
63 analyses: &GraphAnalyses,
64 build_time: std::time::Duration,
65) -> AnalysisStats {
66 AnalysisStats {
67 node_count: analyses.adjacency.node_count,
68 edge_count: analyses.adjacency.edge_count,
69 scc_stats: vec![
70 SccStats {
71 edge_kind: "calls".to_string(),
72 scc_count: analyses.scc_calls.scc_count,
73 non_trivial_count: analyses.scc_calls.non_trivial_count,
74 max_scc_size: analyses.scc_calls.max_scc_size,
75 },
76 SccStats {
77 edge_kind: "imports".to_string(),
78 scc_count: analyses.scc_imports.scc_count,
79 non_trivial_count: analyses.scc_imports.non_trivial_count,
80 max_scc_size: analyses.scc_imports.max_scc_size,
81 },
82 SccStats {
83 edge_kind: "references".to_string(),
84 scc_count: analyses.scc_references.scc_count,
85 non_trivial_count: analyses.scc_references.non_trivial_count,
86 max_scc_size: analyses.scc_references.max_scc_size,
87 },
88 SccStats {
89 edge_kind: "inherits".to_string(),
90 scc_count: analyses.scc_inherits.scc_count,
91 non_trivial_count: analyses.scc_inherits.non_trivial_count,
92 max_scc_size: analyses.scc_inherits.max_scc_size,
93 },
94 ],
95 build_time_secs: build_time.as_secs_f64(),
96 }
97}
98
99#[allow(clippy::too_many_arguments)]
109#[allow(clippy::too_many_lines)] pub fn run_analyze(
111 cli: &Cli,
112 path: Option<&str>,
113 force: bool,
114 threads: Option<usize>,
115 label_budget: Option<u64>,
116 density_threshold: Option<u64>,
117 budget_exceeded_policy: Option<&str>,
118 no_labels: bool,
119) -> Result<()> {
120 let mut streams = OutputStreams::new();
121
122 let search_path = path.map_or_else(
124 || std::env::current_dir().unwrap_or_default(),
125 std::path::PathBuf::from,
126 );
127
128 let index_location = find_nearest_index(&search_path);
129 let Some(ref loc) = index_location else {
130 streams
131 .write_diagnostic("No .sqry-index found. Run 'sqry index' first to build the index.")?;
132 return Ok(());
133 };
134
135 streams.write_diagnostic("Building graph analyses...")?;
136
137 let config = GraphLoadConfig::default();
139 let graph = load_unified_graph_for_cli(&loc.index_root, &config, cli, no_op_reporter())
140 .context("Failed to load graph. Run 'sqry index' to build the graph.")?;
141
142 let storage = GraphStorage::new(&loc.index_root);
146 let analysis_dir = storage.analysis_dir();
147 if !force && has_fresh_analysis(&storage) {
148 streams.write_diagnostic(
149 "Analysis files already exist and match current index. Use --force to rebuild.",
150 )?;
151 return Ok(());
152 }
153
154 let label_budget_config = resolve_label_budget_config(
156 &loc.index_root,
157 label_budget,
158 density_threshold,
159 budget_exceeded_policy,
160 no_labels,
161 )
162 .context("Failed to resolve analysis budget configuration")?;
163
164 streams.write_diagnostic("Creating compaction snapshot...")?;
166 let graph_snapshot = graph.snapshot();
167 let edges = graph_snapshot.edges();
168 let forward_store = edges.forward();
169 let node_count = graph_snapshot.nodes().len();
170 let snapshot = snapshot_edges(&forward_store, node_count);
171
172 let manifest_hash = compute_manifest_hash(storage.manifest_path())
173 .context("Failed to compute manifest hash for analysis identity")?;
174 let node_id_hash = compute_node_id_hash(&graph_snapshot);
175 let identity = AnalysisIdentity::new(manifest_hash, node_id_hash);
176
177 let phase_desc = if label_budget_config.skip_labels {
179 "CSR + SCC + Condensation (labels skipped)"
180 } else {
181 "CSR + SCC + Condensation + 2-hop labels"
182 };
183 streams.write_diagnostic(&format!("Computing analyses ({phase_desc})..."))?;
184 let start = Instant::now();
185 let analyses = if let Some(n) = threads {
186 let pool = rayon::ThreadPoolBuilder::new()
187 .num_threads(n)
188 .build()
189 .context("Failed to create rayon thread pool for analysis")?;
190 pool.install(|| GraphAnalyses::build_all_with_budget(&snapshot, &label_budget_config))
191 .context("Failed to build graph analyses")?
192 } else {
193 GraphAnalyses::build_all_with_budget(&snapshot, &label_budget_config)
194 .context("Failed to build graph analyses")?
195 };
196 let build_time = start.elapsed();
197
198 streams.write_diagnostic("Persisting analyses to disk...")?;
200 analyses
201 .persist_all(&storage, &identity)
202 .context("Failed to persist analyses")?;
203
204 let stats = collect_analysis_stats(&analyses, build_time);
205
206 if cli.json {
208 let json = serde_json::to_string_pretty(&stats).context("Failed to serialize to JSON")?;
209 streams.write_result(&json)?;
210 } else {
211 let output = format_stats_text(&stats, analysis_dir);
212 streams.write_result(&output)?;
213 }
214
215 Ok(())
216}
217
218fn format_stats_text(stats: &AnalysisStats, analysis_dir: &std::path::Path) -> String {
220 let mut lines = Vec::new();
221
222 lines.push("✓ Graph analysis complete".to_string());
223 lines.push(String::new());
224
225 lines.push(format!(
226 "Graph: {} nodes, {} edges",
227 stats.node_count, stats.edge_count
228 ));
229 lines.push(format!("Build time: {:.2}s", stats.build_time_secs));
230 lines.push(String::new());
231
232 lines.push("SCC Analysis:".to_string());
233 for scc_stat in &stats.scc_stats {
234 lines.push(format!(
235 " {}: {} SCCs ({} non-trivial, max size: {})",
236 scc_stat.edge_kind,
237 scc_stat.scc_count,
238 scc_stat.non_trivial_count,
239 scc_stat.max_scc_size
240 ));
241 }
242 lines.push(String::new());
243
244 lines.push(format!(
245 "Analysis files written to: {}",
246 analysis_dir.display()
247 ));
248 lines.push(" - adjacency.csr (CSR adjacency matrix)".to_string());
249 lines.push(
250 " - scc_calls.scc, scc_imports.scc, scc_references.scc, scc_inherits.scc".to_string(),
251 );
252 lines.push(
253 " - cond_calls.dag, cond_imports.dag, cond_references.dag, cond_inherits.dag".to_string(),
254 );
255
256 lines.join("\n")
257}
258
259#[cfg(test)]
260mod tests {
261 use super::*;
262 use sqry_core::graph::unified::analysis::condensation::{
263 CondensationDag, ReachabilityStrategy,
264 };
265 use sqry_core::graph::unified::analysis::csr::CsrAdjacency;
266 use sqry_core::graph::unified::analysis::persistence::{
267 AnalysisIdentity, persist_condensation, persist_scc,
268 };
269 use sqry_core::graph::unified::analysis::scc::SccData;
270 use sqry_core::graph::unified::edge::{EdgeKind, ResolvedVia};
271 use sqry_core::graph::unified::persistence::GraphStorage;
272 use std::time::Duration;
273
274 fn make_scc(edge_kind: EdgeKind, scc_count: u32) -> SccData {
276 SccData {
277 edge_kind,
278 node_count: 10,
279 scc_count,
280 non_trivial_count: u32::from(scc_count > 1),
281 max_scc_size: if scc_count > 1 { 3 } else { 1 },
282 node_to_scc: vec![0; 10],
283 scc_offsets: vec![0, 10],
284 scc_members: (0..10).collect(),
285 has_self_loop: vec![false],
286 }
287 }
288
289 fn make_cond(edge_kind: EdgeKind) -> CondensationDag {
291 CondensationDag {
292 edge_kind,
293 scc_count: 1,
294 edge_count: 0,
295 row_offsets: vec![0, 0],
296 col_indices: vec![],
297 topo_order: vec![0],
298 label_out_offsets: vec![0, 0],
299 label_out_data: vec![],
300 label_in_offsets: vec![0, 0],
301 label_in_data: vec![],
302 strategy: ReachabilityStrategy::DagBfs,
303 }
304 }
305
306 fn analysis_edge_kinds() -> Vec<(&'static str, EdgeKind)> {
308 vec![
309 (
310 "calls",
311 EdgeKind::Calls {
312 argument_count: 0,
313 is_async: false,
314 resolved_via: ResolvedVia::Direct,
315 },
316 ),
317 (
318 "imports",
319 EdgeKind::Imports {
320 alias: None,
321 is_wildcard: false,
322 },
323 ),
324 ("references", EdgeKind::References),
325 ("inherits", EdgeKind::Inherits),
326 ]
327 }
328
329 fn write_analysis_files(root: &std::path::Path, manifest_hash: &str) {
332 let storage = GraphStorage::new(root);
333 let identity = AnalysisIdentity::new(manifest_hash.to_string(), [0u8; 32]);
334 std::fs::create_dir_all(storage.analysis_dir()).unwrap();
335
336 for (kind_str, edge_kind) in analysis_edge_kinds() {
337 let scc = make_scc(edge_kind.clone(), 5);
338 persist_scc(&scc, &identity, &storage.analysis_scc_path(kind_str)).unwrap();
339
340 let cond = make_cond(edge_kind);
341 persist_condensation(&cond, &identity, &storage.analysis_cond_path(kind_str)).unwrap();
342 }
343 }
344
345 fn write_manifest(root: &std::path::Path, content: &str) -> String {
347 let storage = GraphStorage::new(root);
348 std::fs::create_dir_all(storage.graph_dir()).unwrap();
349 std::fs::write(storage.manifest_path(), content).unwrap();
350 compute_manifest_hash(storage.manifest_path()).unwrap()
351 }
352
353 #[test]
358 fn has_fresh_analysis_false_when_no_files_exist() {
359 let tmp = tempfile::tempdir().unwrap();
360 let root = tmp.path();
361
362 write_manifest(root, r#"{"version":"1.0"}"#);
364
365 let storage = GraphStorage::new(root);
366 assert!(!has_fresh_analysis(&storage));
367 }
368
369 #[test]
370 fn has_fresh_analysis_false_when_no_manifest_exists() {
371 let tmp = tempfile::tempdir().unwrap();
372 let root = tmp.path();
373
374 let storage = GraphStorage::new(root);
376 assert!(!has_fresh_analysis(&storage));
377 }
378
379 #[test]
380 fn has_fresh_analysis_true_when_all_files_match() {
381 let tmp = tempfile::tempdir().unwrap();
382 let root = tmp.path();
383
384 let hash = write_manifest(root, r#"{"version":"1.0"}"#);
385 write_analysis_files(root, &hash);
386
387 let storage = GraphStorage::new(root);
388 assert!(has_fresh_analysis(&storage));
389 }
390
391 #[test]
392 fn has_fresh_analysis_false_when_manifest_hash_mismatches() {
393 let tmp = tempfile::tempdir().unwrap();
394 let root = tmp.path();
395
396 let _old_hash = write_manifest(root, r#"{"version":"1.0"}"#);
399 write_analysis_files(root, "stale_hash_that_wont_match");
400
401 let storage = GraphStorage::new(root);
402 assert!(!has_fresh_analysis(&storage));
403 }
404
405 #[test]
406 fn has_fresh_analysis_false_when_one_scc_file_missing() {
407 let tmp = tempfile::tempdir().unwrap();
408 let root = tmp.path();
409
410 let hash = write_manifest(root, r#"{"version":"1.0"}"#);
411 write_analysis_files(root, &hash);
412
413 let storage = GraphStorage::new(root);
415 std::fs::remove_file(storage.analysis_scc_path("imports")).unwrap();
416
417 assert!(!has_fresh_analysis(&storage));
418 }
419
420 #[test]
421 fn has_fresh_analysis_false_when_one_cond_file_missing() {
422 let tmp = tempfile::tempdir().unwrap();
423 let root = tmp.path();
424
425 let hash = write_manifest(root, r#"{"version":"1.0"}"#);
426 write_analysis_files(root, &hash);
427
428 let storage = GraphStorage::new(root);
430 std::fs::remove_file(storage.analysis_cond_path("references")).unwrap();
431
432 assert!(!has_fresh_analysis(&storage));
433 }
434
435 #[test]
440 fn collect_analysis_stats_populated() {
441 let calls_kind = EdgeKind::Calls {
442 argument_count: 0,
443 is_async: false,
444 resolved_via: ResolvedVia::Direct,
445 };
446 let imports_kind = EdgeKind::Imports {
447 alias: None,
448 is_wildcard: false,
449 };
450
451 let analyses = GraphAnalyses {
452 adjacency: CsrAdjacency {
453 node_count: 42,
454 edge_count: 100,
455 row_offsets: vec![],
456 col_indices: vec![],
457 edge_kinds: vec![],
458 },
459 scc_calls: make_scc(calls_kind.clone(), 10),
460 scc_imports: make_scc(imports_kind.clone(), 5),
461 scc_references: make_scc(EdgeKind::References, 3),
462 scc_inherits: make_scc(EdgeKind::Inherits, 0),
463 cond_calls: make_cond(calls_kind),
464 cond_imports: make_cond(imports_kind),
465 cond_references: make_cond(EdgeKind::References),
466 cond_inherits: make_cond(EdgeKind::Inherits),
467 };
468
469 let duration = Duration::from_millis(1234);
470 let stats = collect_analysis_stats(&analyses, duration);
471
472 assert_eq!(stats.node_count, 42);
473 assert_eq!(stats.edge_count, 100);
474 assert_eq!(stats.scc_stats.len(), 4);
475
476 assert_eq!(stats.scc_stats[0].edge_kind, "calls");
478 assert_eq!(stats.scc_stats[0].scc_count, 10);
479 assert_eq!(stats.scc_stats[0].non_trivial_count, 1);
480 assert_eq!(stats.scc_stats[0].max_scc_size, 3);
481
482 assert_eq!(stats.scc_stats[1].edge_kind, "imports");
483 assert_eq!(stats.scc_stats[1].scc_count, 5);
484
485 assert_eq!(stats.scc_stats[2].edge_kind, "references");
486 assert_eq!(stats.scc_stats[2].scc_count, 3);
487
488 assert_eq!(stats.scc_stats[3].edge_kind, "inherits");
489 assert_eq!(stats.scc_stats[3].scc_count, 0);
490 assert_eq!(stats.scc_stats[3].non_trivial_count, 0);
491 assert_eq!(stats.scc_stats[3].max_scc_size, 1);
492
493 #[allow(clippy::float_cmp)]
495 {
496 assert_eq!(stats.build_time_secs, 1.234);
497 }
498 }
499
500 #[test]
505 fn format_stats_text_contains_expected_labels() {
506 let calls_kind = EdgeKind::Calls {
507 argument_count: 0,
508 is_async: false,
509 resolved_via: ResolvedVia::Direct,
510 };
511 let imports_kind = EdgeKind::Imports {
512 alias: None,
513 is_wildcard: false,
514 };
515 let stats = AnalysisStats {
516 node_count: 10,
517 edge_count: 20,
518 build_time_secs: 0.5,
519 scc_stats: vec![
520 SccStats {
521 edge_kind: "calls".to_string(),
522 scc_count: 3,
523 non_trivial_count: 1,
524 max_scc_size: 5,
525 },
526 SccStats {
527 edge_kind: "imports".to_string(),
528 scc_count: 2,
529 non_trivial_count: 0,
530 max_scc_size: 1,
531 },
532 ],
533 };
534
535 let tmp = tempfile::tempdir().unwrap();
536 let analysis_dir = tmp.path().join("analysis");
537 std::fs::create_dir_all(&analysis_dir).unwrap();
538
539 let output = format_stats_text(&stats, &analysis_dir);
540
541 assert!(
542 output.contains("Graph analysis complete"),
543 "Expected completion marker: {output}"
544 );
545 assert!(output.contains("10 nodes"), "Expected node count: {output}");
546 assert!(output.contains("20 edges"), "Expected edge count: {output}");
547 assert!(output.contains("0.50s"), "Expected build time: {output}");
548 assert!(
549 output.contains("calls"),
550 "Expected calls SCC stats: {output}"
551 );
552 assert!(
553 output.contains("imports"),
554 "Expected imports SCC stats: {output}"
555 );
556 assert!(output.contains("3 SCCs"), "Expected SCC count: {output}");
557 assert!(
558 output.contains("max size: 5"),
559 "Expected max SCC size: {output}"
560 );
561 assert!(
562 output.contains(analysis_dir.to_string_lossy().as_ref()),
563 "Expected analysis dir path: {output}"
564 );
565 let _ = calls_kind;
567 let _ = imports_kind;
568 }
569
570 #[test]
571 fn format_stats_text_empty_scc_stats() {
572 let stats = AnalysisStats {
573 node_count: 0,
574 edge_count: 0,
575 build_time_secs: 0.0,
576 scc_stats: vec![],
577 };
578 let tmp = tempfile::tempdir().unwrap();
579 let output = format_stats_text(&stats, tmp.path());
580
581 assert!(
582 output.contains("Graph analysis complete"),
583 "Missing header: {output}"
584 );
585 assert!(output.contains("0 nodes"), "Expected 0 nodes: {output}");
586 assert!(output.contains("0 edges"), "Expected 0 edges: {output}");
587 }
588
589 #[test]
590 fn collect_analysis_stats_empty_graph() {
591 let calls_kind = EdgeKind::Calls {
592 argument_count: 0,
593 is_async: false,
594 resolved_via: ResolvedVia::Direct,
595 };
596 let imports_kind = EdgeKind::Imports {
597 alias: None,
598 is_wildcard: false,
599 };
600
601 let empty_scc = |kind: EdgeKind| SccData {
602 edge_kind: kind,
603 node_count: 0,
604 scc_count: 0,
605 non_trivial_count: 0,
606 max_scc_size: 0,
607 node_to_scc: vec![],
608 scc_offsets: vec![0],
609 scc_members: vec![],
610 has_self_loop: vec![],
611 };
612
613 let analyses = GraphAnalyses {
614 adjacency: CsrAdjacency {
615 node_count: 0,
616 edge_count: 0,
617 row_offsets: vec![0],
618 col_indices: vec![],
619 edge_kinds: vec![],
620 },
621 scc_calls: empty_scc(calls_kind.clone()),
622 scc_imports: empty_scc(imports_kind.clone()),
623 scc_references: empty_scc(EdgeKind::References),
624 scc_inherits: empty_scc(EdgeKind::Inherits),
625 cond_calls: make_cond(calls_kind),
626 cond_imports: make_cond(imports_kind),
627 cond_references: make_cond(EdgeKind::References),
628 cond_inherits: make_cond(EdgeKind::Inherits),
629 };
630
631 let duration = Duration::from_secs(0);
632 let stats = collect_analysis_stats(&analyses, duration);
633
634 assert_eq!(stats.node_count, 0);
635 assert_eq!(stats.edge_count, 0);
636 for scc_stat in &stats.scc_stats {
637 assert_eq!(scc_stat.scc_count, 0);
638 assert_eq!(scc_stat.non_trivial_count, 0);
639 assert_eq!(scc_stat.max_scc_size, 0);
640 }
641 }
642}