1use crate::args::Cli;
6use crate::commands::graph::loader::{GraphLoadConfig, load_unified_graph_for_cli};
7use crate::index_discovery::find_nearest_index;
8use crate::output::OutputStreams;
9use anyhow::{Context, Result};
10use serde::Serialize;
11use sqry_core::graph::unified::analysis::{
12 AnalysisIdentity, GraphAnalyses, compute_manifest_hash, compute_node_id_hash,
13 resolve_label_budget_config,
14};
15use sqry_core::graph::unified::compaction::snapshot_edges;
16use sqry_core::graph::unified::persistence::GraphStorage;
17use std::time::Instant;
18
19#[derive(Debug, Serialize)]
21struct AnalysisStats {
22 node_count: u32,
24 edge_count: u32,
26 scc_stats: Vec<SccStats>,
28 build_time_secs: f64,
30}
31
32#[derive(Debug, Serialize)]
33struct SccStats {
34 edge_kind: String,
35 scc_count: u32,
36 non_trivial_count: u32,
37 max_scc_size: u32,
38}
39
40fn has_fresh_analysis(storage: &GraphStorage) -> bool {
41 let manifest_hash = compute_manifest_hash(storage.manifest_path()).ok();
42 manifest_hash.is_some_and(|hash| {
43 ["calls", "imports", "references", "inherits"]
44 .iter()
45 .all(|kind| {
46 let scc_path = storage.analysis_scc_path(kind);
47 let cond_path = storage.analysis_cond_path(kind);
48 scc_path.exists()
49 && cond_path.exists()
50 && sqry_core::graph::unified::analysis::persistence::load_scc_manifest_checked(
51 &scc_path, &hash,
52 )
53 .is_ok()
54 && sqry_core::graph::unified::analysis::persistence::load_condensation_manifest_checked(
55 &cond_path, &hash,
56 )
57 .is_ok()
58 })
59 })
60}
61
62fn collect_analysis_stats(
63 analyses: &GraphAnalyses,
64 build_time: std::time::Duration,
65) -> AnalysisStats {
66 AnalysisStats {
67 node_count: analyses.adjacency.node_count,
68 edge_count: analyses.adjacency.edge_count,
69 scc_stats: vec![
70 SccStats {
71 edge_kind: "calls".to_string(),
72 scc_count: analyses.scc_calls.scc_count,
73 non_trivial_count: analyses.scc_calls.non_trivial_count,
74 max_scc_size: analyses.scc_calls.max_scc_size,
75 },
76 SccStats {
77 edge_kind: "imports".to_string(),
78 scc_count: analyses.scc_imports.scc_count,
79 non_trivial_count: analyses.scc_imports.non_trivial_count,
80 max_scc_size: analyses.scc_imports.max_scc_size,
81 },
82 SccStats {
83 edge_kind: "references".to_string(),
84 scc_count: analyses.scc_references.scc_count,
85 non_trivial_count: analyses.scc_references.non_trivial_count,
86 max_scc_size: analyses.scc_references.max_scc_size,
87 },
88 SccStats {
89 edge_kind: "inherits".to_string(),
90 scc_count: analyses.scc_inherits.scc_count,
91 non_trivial_count: analyses.scc_inherits.non_trivial_count,
92 max_scc_size: analyses.scc_inherits.max_scc_size,
93 },
94 ],
95 build_time_secs: build_time.as_secs_f64(),
96 }
97}
98
99#[allow(clippy::too_many_arguments)]
109#[allow(clippy::too_many_lines)] pub fn run_analyze(
111 cli: &Cli,
112 path: Option<&str>,
113 force: bool,
114 threads: Option<usize>,
115 label_budget: Option<u64>,
116 density_threshold: Option<u64>,
117 budget_exceeded_policy: Option<&str>,
118 no_labels: bool,
119) -> Result<()> {
120 let mut streams = OutputStreams::new();
121
122 let search_path = path.map_or_else(
124 || std::env::current_dir().unwrap_or_default(),
125 std::path::PathBuf::from,
126 );
127
128 let index_location = find_nearest_index(&search_path);
129 let Some(ref loc) = index_location else {
130 streams
131 .write_diagnostic("No .sqry-index found. Run 'sqry index' first to build the index.")?;
132 return Ok(());
133 };
134
135 streams.write_diagnostic("Building graph analyses...")?;
136
137 let config = GraphLoadConfig::default();
139 let graph = load_unified_graph_for_cli(&loc.index_root, &config, cli)
140 .context("Failed to load graph. Run 'sqry index' to build the graph.")?;
141
142 let storage = GraphStorage::new(&loc.index_root);
146 let analysis_dir = storage.analysis_dir();
147 if !force && has_fresh_analysis(&storage) {
148 streams.write_diagnostic(
149 "Analysis files already exist and match current index. Use --force to rebuild.",
150 )?;
151 return Ok(());
152 }
153
154 let label_budget_config = resolve_label_budget_config(
156 &loc.index_root,
157 label_budget,
158 density_threshold,
159 budget_exceeded_policy,
160 no_labels,
161 )
162 .context("Failed to resolve analysis budget configuration")?;
163
164 streams.write_diagnostic("Creating compaction snapshot...")?;
166 let graph_snapshot = graph.snapshot();
167 let edges = graph_snapshot.edges();
168 let forward_store = edges.forward();
169 let node_count = graph_snapshot.nodes().len();
170 let snapshot = snapshot_edges(&forward_store, node_count);
171
172 let manifest_hash = compute_manifest_hash(storage.manifest_path())
173 .context("Failed to compute manifest hash for analysis identity")?;
174 let node_id_hash = compute_node_id_hash(&graph_snapshot);
175 let identity = AnalysisIdentity::new(manifest_hash, node_id_hash);
176
177 let phase_desc = if label_budget_config.skip_labels {
179 "CSR + SCC + Condensation (labels skipped)"
180 } else {
181 "CSR + SCC + Condensation + 2-hop labels"
182 };
183 streams.write_diagnostic(&format!("Computing analyses ({phase_desc})..."))?;
184 let start = Instant::now();
185 let analyses = if let Some(n) = threads {
186 let pool = rayon::ThreadPoolBuilder::new()
187 .num_threads(n)
188 .build()
189 .context("Failed to create rayon thread pool for analysis")?;
190 pool.install(|| GraphAnalyses::build_all_with_budget(&snapshot, &label_budget_config))
191 .context("Failed to build graph analyses")?
192 } else {
193 GraphAnalyses::build_all_with_budget(&snapshot, &label_budget_config)
194 .context("Failed to build graph analyses")?
195 };
196 let build_time = start.elapsed();
197
198 streams.write_diagnostic("Persisting analyses to disk...")?;
200 analyses
201 .persist_all(&storage, &identity)
202 .context("Failed to persist analyses")?;
203
204 let stats = collect_analysis_stats(&analyses, build_time);
205
206 if cli.json {
208 let json = serde_json::to_string_pretty(&stats).context("Failed to serialize to JSON")?;
209 streams.write_result(&json)?;
210 } else {
211 let output = format_stats_text(&stats, analysis_dir);
212 streams.write_result(&output)?;
213 }
214
215 Ok(())
216}
217
218fn format_stats_text(stats: &AnalysisStats, analysis_dir: &std::path::Path) -> String {
220 let mut lines = Vec::new();
221
222 lines.push("✓ Graph analysis complete".to_string());
223 lines.push(String::new());
224
225 lines.push(format!(
226 "Graph: {} nodes, {} edges",
227 stats.node_count, stats.edge_count
228 ));
229 lines.push(format!("Build time: {:.2}s", stats.build_time_secs));
230 lines.push(String::new());
231
232 lines.push("SCC Analysis:".to_string());
233 for scc_stat in &stats.scc_stats {
234 lines.push(format!(
235 " {}: {} SCCs ({} non-trivial, max size: {})",
236 scc_stat.edge_kind,
237 scc_stat.scc_count,
238 scc_stat.non_trivial_count,
239 scc_stat.max_scc_size
240 ));
241 }
242 lines.push(String::new());
243
244 lines.push(format!(
245 "Analysis files written to: {}",
246 analysis_dir.display()
247 ));
248 lines.push(" - adjacency.csr (CSR adjacency matrix)".to_string());
249 lines.push(
250 " - scc_calls.scc, scc_imports.scc, scc_references.scc, scc_inherits.scc".to_string(),
251 );
252 lines.push(
253 " - cond_calls.dag, cond_imports.dag, cond_references.dag, cond_inherits.dag".to_string(),
254 );
255
256 lines.join("\n")
257}
258
259#[cfg(test)]
260mod tests {
261 use super::*;
262 use sqry_core::graph::unified::analysis::condensation::{
263 CondensationDag, ReachabilityStrategy,
264 };
265 use sqry_core::graph::unified::analysis::csr::CsrAdjacency;
266 use sqry_core::graph::unified::analysis::persistence::{
267 AnalysisIdentity, persist_condensation, persist_scc,
268 };
269 use sqry_core::graph::unified::analysis::scc::SccData;
270 use sqry_core::graph::unified::edge::EdgeKind;
271 use sqry_core::graph::unified::persistence::GraphStorage;
272 use std::time::Duration;
273
274 fn make_scc(edge_kind: EdgeKind, scc_count: u32) -> SccData {
276 SccData {
277 edge_kind,
278 node_count: 10,
279 scc_count,
280 non_trivial_count: u32::from(scc_count > 1),
281 max_scc_size: if scc_count > 1 { 3 } else { 1 },
282 node_to_scc: vec![0; 10],
283 scc_offsets: vec![0, 10],
284 scc_members: (0..10).collect(),
285 has_self_loop: vec![false],
286 }
287 }
288
289 fn make_cond(edge_kind: EdgeKind) -> CondensationDag {
291 CondensationDag {
292 edge_kind,
293 scc_count: 1,
294 edge_count: 0,
295 row_offsets: vec![0, 0],
296 col_indices: vec![],
297 topo_order: vec![0],
298 label_out_offsets: vec![0, 0],
299 label_out_data: vec![],
300 label_in_offsets: vec![0, 0],
301 label_in_data: vec![],
302 strategy: ReachabilityStrategy::DagBfs,
303 }
304 }
305
306 fn analysis_edge_kinds() -> Vec<(&'static str, EdgeKind)> {
308 vec![
309 (
310 "calls",
311 EdgeKind::Calls {
312 argument_count: 0,
313 is_async: false,
314 },
315 ),
316 (
317 "imports",
318 EdgeKind::Imports {
319 alias: None,
320 is_wildcard: false,
321 },
322 ),
323 ("references", EdgeKind::References),
324 ("inherits", EdgeKind::Inherits),
325 ]
326 }
327
328 fn write_analysis_files(root: &std::path::Path, manifest_hash: &str) {
331 let storage = GraphStorage::new(root);
332 let identity = AnalysisIdentity::new(manifest_hash.to_string(), [0u8; 32]);
333 std::fs::create_dir_all(storage.analysis_dir()).unwrap();
334
335 for (kind_str, edge_kind) in analysis_edge_kinds() {
336 let scc = make_scc(edge_kind.clone(), 5);
337 persist_scc(&scc, &identity, &storage.analysis_scc_path(kind_str)).unwrap();
338
339 let cond = make_cond(edge_kind);
340 persist_condensation(&cond, &identity, &storage.analysis_cond_path(kind_str)).unwrap();
341 }
342 }
343
344 fn write_manifest(root: &std::path::Path, content: &str) -> String {
346 let storage = GraphStorage::new(root);
347 std::fs::create_dir_all(storage.graph_dir()).unwrap();
348 std::fs::write(storage.manifest_path(), content).unwrap();
349 compute_manifest_hash(storage.manifest_path()).unwrap()
350 }
351
352 #[test]
357 fn has_fresh_analysis_false_when_no_files_exist() {
358 let tmp = tempfile::tempdir().unwrap();
359 let root = tmp.path();
360
361 write_manifest(root, r#"{"version":"1.0"}"#);
363
364 let storage = GraphStorage::new(root);
365 assert!(!has_fresh_analysis(&storage));
366 }
367
368 #[test]
369 fn has_fresh_analysis_false_when_no_manifest_exists() {
370 let tmp = tempfile::tempdir().unwrap();
371 let root = tmp.path();
372
373 let storage = GraphStorage::new(root);
375 assert!(!has_fresh_analysis(&storage));
376 }
377
378 #[test]
379 fn has_fresh_analysis_true_when_all_files_match() {
380 let tmp = tempfile::tempdir().unwrap();
381 let root = tmp.path();
382
383 let hash = write_manifest(root, r#"{"version":"1.0"}"#);
384 write_analysis_files(root, &hash);
385
386 let storage = GraphStorage::new(root);
387 assert!(has_fresh_analysis(&storage));
388 }
389
390 #[test]
391 fn has_fresh_analysis_false_when_manifest_hash_mismatches() {
392 let tmp = tempfile::tempdir().unwrap();
393 let root = tmp.path();
394
395 let _old_hash = write_manifest(root, r#"{"version":"1.0"}"#);
398 write_analysis_files(root, "stale_hash_that_wont_match");
399
400 let storage = GraphStorage::new(root);
401 assert!(!has_fresh_analysis(&storage));
402 }
403
404 #[test]
405 fn has_fresh_analysis_false_when_one_scc_file_missing() {
406 let tmp = tempfile::tempdir().unwrap();
407 let root = tmp.path();
408
409 let hash = write_manifest(root, r#"{"version":"1.0"}"#);
410 write_analysis_files(root, &hash);
411
412 let storage = GraphStorage::new(root);
414 std::fs::remove_file(storage.analysis_scc_path("imports")).unwrap();
415
416 assert!(!has_fresh_analysis(&storage));
417 }
418
419 #[test]
420 fn has_fresh_analysis_false_when_one_cond_file_missing() {
421 let tmp = tempfile::tempdir().unwrap();
422 let root = tmp.path();
423
424 let hash = write_manifest(root, r#"{"version":"1.0"}"#);
425 write_analysis_files(root, &hash);
426
427 let storage = GraphStorage::new(root);
429 std::fs::remove_file(storage.analysis_cond_path("references")).unwrap();
430
431 assert!(!has_fresh_analysis(&storage));
432 }
433
434 #[test]
439 fn collect_analysis_stats_populated() {
440 let calls_kind = EdgeKind::Calls {
441 argument_count: 0,
442 is_async: false,
443 };
444 let imports_kind = EdgeKind::Imports {
445 alias: None,
446 is_wildcard: false,
447 };
448
449 let analyses = GraphAnalyses {
450 adjacency: CsrAdjacency {
451 node_count: 42,
452 edge_count: 100,
453 row_offsets: vec![],
454 col_indices: vec![],
455 edge_kinds: vec![],
456 },
457 scc_calls: make_scc(calls_kind.clone(), 10),
458 scc_imports: make_scc(imports_kind.clone(), 5),
459 scc_references: make_scc(EdgeKind::References, 3),
460 scc_inherits: make_scc(EdgeKind::Inherits, 0),
461 cond_calls: make_cond(calls_kind),
462 cond_imports: make_cond(imports_kind),
463 cond_references: make_cond(EdgeKind::References),
464 cond_inherits: make_cond(EdgeKind::Inherits),
465 };
466
467 let duration = Duration::from_millis(1234);
468 let stats = collect_analysis_stats(&analyses, duration);
469
470 assert_eq!(stats.node_count, 42);
471 assert_eq!(stats.edge_count, 100);
472 assert_eq!(stats.scc_stats.len(), 4);
473
474 assert_eq!(stats.scc_stats[0].edge_kind, "calls");
476 assert_eq!(stats.scc_stats[0].scc_count, 10);
477 assert_eq!(stats.scc_stats[0].non_trivial_count, 1);
478 assert_eq!(stats.scc_stats[0].max_scc_size, 3);
479
480 assert_eq!(stats.scc_stats[1].edge_kind, "imports");
481 assert_eq!(stats.scc_stats[1].scc_count, 5);
482
483 assert_eq!(stats.scc_stats[2].edge_kind, "references");
484 assert_eq!(stats.scc_stats[2].scc_count, 3);
485
486 assert_eq!(stats.scc_stats[3].edge_kind, "inherits");
487 assert_eq!(stats.scc_stats[3].scc_count, 0);
488 assert_eq!(stats.scc_stats[3].non_trivial_count, 0);
489 assert_eq!(stats.scc_stats[3].max_scc_size, 1);
490
491 #[allow(clippy::float_cmp)]
493 {
494 assert_eq!(stats.build_time_secs, 1.234);
495 }
496 }
497
498 #[test]
503 fn format_stats_text_contains_expected_labels() {
504 let calls_kind = EdgeKind::Calls {
505 argument_count: 0,
506 is_async: false,
507 };
508 let imports_kind = EdgeKind::Imports {
509 alias: None,
510 is_wildcard: false,
511 };
512 let stats = AnalysisStats {
513 node_count: 10,
514 edge_count: 20,
515 build_time_secs: 0.5,
516 scc_stats: vec![
517 SccStats {
518 edge_kind: "calls".to_string(),
519 scc_count: 3,
520 non_trivial_count: 1,
521 max_scc_size: 5,
522 },
523 SccStats {
524 edge_kind: "imports".to_string(),
525 scc_count: 2,
526 non_trivial_count: 0,
527 max_scc_size: 1,
528 },
529 ],
530 };
531
532 let tmp = tempfile::tempdir().unwrap();
533 let analysis_dir = tmp.path().join("analysis");
534 std::fs::create_dir_all(&analysis_dir).unwrap();
535
536 let output = format_stats_text(&stats, &analysis_dir);
537
538 assert!(
539 output.contains("Graph analysis complete"),
540 "Expected completion marker: {output}"
541 );
542 assert!(output.contains("10 nodes"), "Expected node count: {output}");
543 assert!(output.contains("20 edges"), "Expected edge count: {output}");
544 assert!(output.contains("0.50s"), "Expected build time: {output}");
545 assert!(
546 output.contains("calls"),
547 "Expected calls SCC stats: {output}"
548 );
549 assert!(
550 output.contains("imports"),
551 "Expected imports SCC stats: {output}"
552 );
553 assert!(output.contains("3 SCCs"), "Expected SCC count: {output}");
554 assert!(
555 output.contains("max size: 5"),
556 "Expected max SCC size: {output}"
557 );
558 assert!(
559 output.contains(analysis_dir.to_string_lossy().as_ref()),
560 "Expected analysis dir path: {output}"
561 );
562 let _ = calls_kind;
564 let _ = imports_kind;
565 }
566
567 #[test]
568 fn format_stats_text_empty_scc_stats() {
569 let stats = AnalysisStats {
570 node_count: 0,
571 edge_count: 0,
572 build_time_secs: 0.0,
573 scc_stats: vec![],
574 };
575 let tmp = tempfile::tempdir().unwrap();
576 let output = format_stats_text(&stats, tmp.path());
577
578 assert!(
579 output.contains("Graph analysis complete"),
580 "Missing header: {output}"
581 );
582 assert!(output.contains("0 nodes"), "Expected 0 nodes: {output}");
583 assert!(output.contains("0 edges"), "Expected 0 edges: {output}");
584 }
585
586 #[test]
587 fn collect_analysis_stats_empty_graph() {
588 let calls_kind = EdgeKind::Calls {
589 argument_count: 0,
590 is_async: false,
591 };
592 let imports_kind = EdgeKind::Imports {
593 alias: None,
594 is_wildcard: false,
595 };
596
597 let empty_scc = |kind: EdgeKind| SccData {
598 edge_kind: kind,
599 node_count: 0,
600 scc_count: 0,
601 non_trivial_count: 0,
602 max_scc_size: 0,
603 node_to_scc: vec![],
604 scc_offsets: vec![0],
605 scc_members: vec![],
606 has_self_loop: vec![],
607 };
608
609 let analyses = GraphAnalyses {
610 adjacency: CsrAdjacency {
611 node_count: 0,
612 edge_count: 0,
613 row_offsets: vec![0],
614 col_indices: vec![],
615 edge_kinds: vec![],
616 },
617 scc_calls: empty_scc(calls_kind.clone()),
618 scc_imports: empty_scc(imports_kind.clone()),
619 scc_references: empty_scc(EdgeKind::References),
620 scc_inherits: empty_scc(EdgeKind::Inherits),
621 cond_calls: make_cond(calls_kind),
622 cond_imports: make_cond(imports_kind),
623 cond_references: make_cond(EdgeKind::References),
624 cond_inherits: make_cond(EdgeKind::Inherits),
625 };
626
627 let duration = Duration::from_secs(0);
628 let stats = collect_analysis_stats(&analyses, duration);
629
630 assert_eq!(stats.node_count, 0);
631 assert_eq!(stats.edge_count, 0);
632 for scc_stat in &stats.scc_stats {
633 assert_eq!(scc_stat.scc_count, 0);
634 assert_eq!(scc_stat.non_trivial_count, 0);
635 assert_eq!(scc_stat.max_scc_size, 0);
636 }
637 }
638}