Skip to main content

sqry_cli/commands/
analyze.rs

1//! Analyze command implementation
2//!
3//! Builds precomputed graph analyses (Pass 5) for fast query-time performance.
4
5use crate::args::Cli;
6use crate::commands::graph::loader::{GraphLoadConfig, load_unified_graph_for_cli};
7use crate::index_discovery::find_nearest_index;
8use crate::output::OutputStreams;
9use anyhow::{Context, Result};
10use serde::Serialize;
11use sqry_core::graph::unified::analysis::{
12    AnalysisIdentity, GraphAnalyses, compute_manifest_hash, compute_node_id_hash,
13    resolve_label_budget_config,
14};
15use sqry_core::graph::unified::compaction::snapshot_edges;
16use sqry_core::graph::unified::persistence::GraphStorage;
17use std::time::Instant;
18
19/// Analysis statistics for output
20#[derive(Debug, Serialize)]
21struct AnalysisStats {
22    /// Total nodes in graph
23    node_count: u32,
24    /// Total edges in graph
25    edge_count: u32,
26    /// SCC statistics per edge kind
27    scc_stats: Vec<SccStats>,
28    /// Analysis build time in seconds
29    build_time_secs: f64,
30}
31
32#[derive(Debug, Serialize)]
33struct SccStats {
34    edge_kind: String,
35    scc_count: u32,
36    non_trivial_count: u32,
37    max_scc_size: u32,
38}
39
40fn has_fresh_analysis(storage: &GraphStorage) -> bool {
41    let manifest_hash = compute_manifest_hash(storage.manifest_path()).ok();
42    manifest_hash.is_some_and(|hash| {
43        ["calls", "imports", "references", "inherits"]
44            .iter()
45            .all(|kind| {
46                let scc_path = storage.analysis_scc_path(kind);
47                let cond_path = storage.analysis_cond_path(kind);
48                scc_path.exists()
49                    && cond_path.exists()
50                    && sqry_core::graph::unified::analysis::persistence::load_scc_manifest_checked(
51                        &scc_path, &hash,
52                    )
53                    .is_ok()
54                    && sqry_core::graph::unified::analysis::persistence::load_condensation_manifest_checked(
55                        &cond_path, &hash,
56                    )
57                    .is_ok()
58            })
59    })
60}
61
62fn collect_analysis_stats(
63    analyses: &GraphAnalyses,
64    build_time: std::time::Duration,
65) -> AnalysisStats {
66    AnalysisStats {
67        node_count: analyses.adjacency.node_count,
68        edge_count: analyses.adjacency.edge_count,
69        scc_stats: vec![
70            SccStats {
71                edge_kind: "calls".to_string(),
72                scc_count: analyses.scc_calls.scc_count,
73                non_trivial_count: analyses.scc_calls.non_trivial_count,
74                max_scc_size: analyses.scc_calls.max_scc_size,
75            },
76            SccStats {
77                edge_kind: "imports".to_string(),
78                scc_count: analyses.scc_imports.scc_count,
79                non_trivial_count: analyses.scc_imports.non_trivial_count,
80                max_scc_size: analyses.scc_imports.max_scc_size,
81            },
82            SccStats {
83                edge_kind: "references".to_string(),
84                scc_count: analyses.scc_references.scc_count,
85                non_trivial_count: analyses.scc_references.non_trivial_count,
86                max_scc_size: analyses.scc_references.max_scc_size,
87            },
88            SccStats {
89                edge_kind: "inherits".to_string(),
90                scc_count: analyses.scc_inherits.scc_count,
91                non_trivial_count: analyses.scc_inherits.non_trivial_count,
92                max_scc_size: analyses.scc_inherits.max_scc_size,
93            },
94        ],
95        build_time_secs: build_time.as_secs_f64(),
96    }
97}
98
99/// Run the analyze command.
100///
101/// Builds precomputed graph analyses (CSR, SCC, Condensation DAG, 2-hop labels)
102/// and persists them to .sqry/analysis/ for fast query-time performance.
103///
104/// Analysis settings are resolved with precedence: CLI args > config file > env vars > compiled defaults.
105///
106/// # Errors
107/// Returns an error if the graph cannot be loaded or analyses cannot be built.
108#[allow(clippy::too_many_arguments)]
109#[allow(clippy::too_many_lines)] // Sequential CLI orchestration across index discovery, graph loading, analysis execution, persistence, and user-facing output.
110pub fn run_analyze(
111    cli: &Cli,
112    path: Option<&str>,
113    force: bool,
114    threads: Option<usize>,
115    label_budget: Option<u64>,
116    density_threshold: Option<u64>,
117    budget_exceeded_policy: Option<&str>,
118    no_labels: bool,
119) -> Result<()> {
120    let mut streams = OutputStreams::new();
121
122    // Find index
123    let search_path = path.map_or_else(
124        || std::env::current_dir().unwrap_or_default(),
125        std::path::PathBuf::from,
126    );
127
128    let index_location = find_nearest_index(&search_path);
129    let Some(ref loc) = index_location else {
130        streams
131            .write_diagnostic("No .sqry-index found. Run 'sqry index' first to build the index.")?;
132        return Ok(());
133    };
134
135    streams.write_diagnostic("Building graph analyses...")?;
136
137    // Load unified graph
138    let config = GraphLoadConfig::default();
139    let graph = load_unified_graph_for_cli(&loc.index_root, &config, cli)
140        .context("Failed to load graph. Run 'sqry index' to build the graph.")?;
141
142    // Check if full analysis artifacts already exist AND match the current manifest.
143    // After `sqry index --force`, stale SCC/DAG files from a previous analysis may
144    // remain on disk. We validate against the manifest hash to detect this.
145    let storage = GraphStorage::new(&loc.index_root);
146    let analysis_dir = storage.analysis_dir();
147    if !force && has_fresh_analysis(&storage) {
148        streams.write_diagnostic(
149            "Analysis files already exist and match current index. Use --force to rebuild.",
150        )?;
151        return Ok(());
152    }
153
154    // Resolve analysis settings: CLI args > config file > env vars > compiled defaults
155    let label_budget_config = resolve_label_budget_config(
156        &loc.index_root,
157        label_budget,
158        density_threshold,
159        budget_exceeded_policy,
160        no_labels,
161    )
162    .context("Failed to resolve analysis budget configuration")?;
163
164    // Build compaction snapshot from graph
165    streams.write_diagnostic("Creating compaction snapshot...")?;
166    let graph_snapshot = graph.snapshot();
167    let edges = graph_snapshot.edges();
168    let forward_store = edges.forward();
169    let node_count = graph_snapshot.nodes().len();
170    let snapshot = snapshot_edges(&forward_store, node_count);
171
172    let manifest_hash = compute_manifest_hash(storage.manifest_path())
173        .context("Failed to compute manifest hash for analysis identity")?;
174    let node_id_hash = compute_node_id_hash(&graph_snapshot);
175    let identity = AnalysisIdentity::new(manifest_hash, node_id_hash);
176
177    // Build all analyses
178    let phase_desc = if label_budget_config.skip_labels {
179        "CSR + SCC + Condensation (labels skipped)"
180    } else {
181        "CSR + SCC + Condensation + 2-hop labels"
182    };
183    streams.write_diagnostic(&format!("Computing analyses ({phase_desc})..."))?;
184    let start = Instant::now();
185    let analyses = if let Some(n) = threads {
186        let pool = rayon::ThreadPoolBuilder::new()
187            .num_threads(n)
188            .build()
189            .context("Failed to create rayon thread pool for analysis")?;
190        pool.install(|| GraphAnalyses::build_all_with_budget(&snapshot, &label_budget_config))
191            .context("Failed to build graph analyses")?
192    } else {
193        GraphAnalyses::build_all_with_budget(&snapshot, &label_budget_config)
194            .context("Failed to build graph analyses")?
195    };
196    let build_time = start.elapsed();
197
198    // Persist to disk
199    streams.write_diagnostic("Persisting analyses to disk...")?;
200    analyses
201        .persist_all(&storage, &identity)
202        .context("Failed to persist analyses")?;
203
204    let stats = collect_analysis_stats(&analyses, build_time);
205
206    // Output
207    if cli.json {
208        let json = serde_json::to_string_pretty(&stats).context("Failed to serialize to JSON")?;
209        streams.write_result(&json)?;
210    } else {
211        let output = format_stats_text(&stats, analysis_dir);
212        streams.write_result(&output)?;
213    }
214
215    Ok(())
216}
217
218/// Format analysis statistics as human-readable text
219fn format_stats_text(stats: &AnalysisStats, analysis_dir: &std::path::Path) -> String {
220    let mut lines = Vec::new();
221
222    lines.push("✓ Graph analysis complete".to_string());
223    lines.push(String::new());
224
225    lines.push(format!(
226        "Graph: {} nodes, {} edges",
227        stats.node_count, stats.edge_count
228    ));
229    lines.push(format!("Build time: {:.2}s", stats.build_time_secs));
230    lines.push(String::new());
231
232    lines.push("SCC Analysis:".to_string());
233    for scc_stat in &stats.scc_stats {
234        lines.push(format!(
235            "  {}: {} SCCs ({} non-trivial, max size: {})",
236            scc_stat.edge_kind,
237            scc_stat.scc_count,
238            scc_stat.non_trivial_count,
239            scc_stat.max_scc_size
240        ));
241    }
242    lines.push(String::new());
243
244    lines.push(format!(
245        "Analysis files written to: {}",
246        analysis_dir.display()
247    ));
248    lines.push("  - adjacency.csr (CSR adjacency matrix)".to_string());
249    lines.push(
250        "  - scc_calls.scc, scc_imports.scc, scc_references.scc, scc_inherits.scc".to_string(),
251    );
252    lines.push(
253        "  - cond_calls.dag, cond_imports.dag, cond_references.dag, cond_inherits.dag".to_string(),
254    );
255
256    lines.join("\n")
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262    use sqry_core::graph::unified::analysis::condensation::{
263        CondensationDag, ReachabilityStrategy,
264    };
265    use sqry_core::graph::unified::analysis::csr::CsrAdjacency;
266    use sqry_core::graph::unified::analysis::persistence::{
267        AnalysisIdentity, persist_condensation, persist_scc,
268    };
269    use sqry_core::graph::unified::analysis::scc::SccData;
270    use sqry_core::graph::unified::edge::EdgeKind;
271    use sqry_core::graph::unified::persistence::GraphStorage;
272    use std::time::Duration;
273
274    /// Create a minimal `SccData` for a given edge kind.
275    fn make_scc(edge_kind: EdgeKind, scc_count: u32) -> SccData {
276        SccData {
277            edge_kind,
278            node_count: 10,
279            scc_count,
280            non_trivial_count: u32::from(scc_count > 1),
281            max_scc_size: if scc_count > 1 { 3 } else { 1 },
282            node_to_scc: vec![0; 10],
283            scc_offsets: vec![0, 10],
284            scc_members: (0..10).collect(),
285            has_self_loop: vec![false],
286        }
287    }
288
289    /// Create a minimal `CondensationDag` for a given edge kind.
290    fn make_cond(edge_kind: EdgeKind) -> CondensationDag {
291        CondensationDag {
292            edge_kind,
293            scc_count: 1,
294            edge_count: 0,
295            row_offsets: vec![0, 0],
296            col_indices: vec![],
297            topo_order: vec![0],
298            label_out_offsets: vec![0, 0],
299            label_out_data: vec![],
300            label_in_offsets: vec![0, 0],
301            label_in_data: vec![],
302            strategy: ReachabilityStrategy::DagBfs,
303        }
304    }
305
306    /// Create the four edge kinds used by analysis in canonical order.
307    fn analysis_edge_kinds() -> Vec<(&'static str, EdgeKind)> {
308        vec![
309            (
310                "calls",
311                EdgeKind::Calls {
312                    argument_count: 0,
313                    is_async: false,
314                },
315            ),
316            (
317                "imports",
318                EdgeKind::Imports {
319                    alias: None,
320                    is_wildcard: false,
321                },
322            ),
323            ("references", EdgeKind::References),
324            ("inherits", EdgeKind::Inherits),
325        ]
326    }
327
328    /// Write a manifest.json and all 8 analysis files (4 SCC + 4 condensation)
329    /// using the given manifest hash.
330    fn write_analysis_files(root: &std::path::Path, manifest_hash: &str) {
331        let storage = GraphStorage::new(root);
332        let identity = AnalysisIdentity::new(manifest_hash.to_string(), [0u8; 32]);
333        std::fs::create_dir_all(storage.analysis_dir()).unwrap();
334
335        for (kind_str, edge_kind) in analysis_edge_kinds() {
336            let scc = make_scc(edge_kind.clone(), 5);
337            persist_scc(&scc, &identity, &storage.analysis_scc_path(kind_str)).unwrap();
338
339            let cond = make_cond(edge_kind);
340            persist_condensation(&cond, &identity, &storage.analysis_cond_path(kind_str)).unwrap();
341        }
342    }
343
344    /// Write a manifest.json file with given content and return its SHA-256 hash.
345    fn write_manifest(root: &std::path::Path, content: &str) -> String {
346        let storage = GraphStorage::new(root);
347        std::fs::create_dir_all(storage.graph_dir()).unwrap();
348        std::fs::write(storage.manifest_path(), content).unwrap();
349        compute_manifest_hash(storage.manifest_path()).unwrap()
350    }
351
352    // ========================================================================
353    // has_fresh_analysis tests
354    // ========================================================================
355
356    #[test]
357    fn has_fresh_analysis_false_when_no_files_exist() {
358        let tmp = tempfile::tempdir().unwrap();
359        let root = tmp.path();
360
361        // Write only a manifest, but no analysis files.
362        write_manifest(root, r#"{"version":"1.0"}"#);
363
364        let storage = GraphStorage::new(root);
365        assert!(!has_fresh_analysis(&storage));
366    }
367
368    #[test]
369    fn has_fresh_analysis_false_when_no_manifest_exists() {
370        let tmp = tempfile::tempdir().unwrap();
371        let root = tmp.path();
372
373        // No manifest at all — compute_manifest_hash should fail.
374        let storage = GraphStorage::new(root);
375        assert!(!has_fresh_analysis(&storage));
376    }
377
378    #[test]
379    fn has_fresh_analysis_true_when_all_files_match() {
380        let tmp = tempfile::tempdir().unwrap();
381        let root = tmp.path();
382
383        let hash = write_manifest(root, r#"{"version":"1.0"}"#);
384        write_analysis_files(root, &hash);
385
386        let storage = GraphStorage::new(root);
387        assert!(has_fresh_analysis(&storage));
388    }
389
390    #[test]
391    fn has_fresh_analysis_false_when_manifest_hash_mismatches() {
392        let tmp = tempfile::tempdir().unwrap();
393        let root = tmp.path();
394
395        // Write analysis files with one hash, then change the manifest so the
396        // hash no longer matches.
397        let _old_hash = write_manifest(root, r#"{"version":"1.0"}"#);
398        write_analysis_files(root, "stale_hash_that_wont_match");
399
400        let storage = GraphStorage::new(root);
401        assert!(!has_fresh_analysis(&storage));
402    }
403
404    #[test]
405    fn has_fresh_analysis_false_when_one_scc_file_missing() {
406        let tmp = tempfile::tempdir().unwrap();
407        let root = tmp.path();
408
409        let hash = write_manifest(root, r#"{"version":"1.0"}"#);
410        write_analysis_files(root, &hash);
411
412        // Remove one SCC file to simulate partial corruption.
413        let storage = GraphStorage::new(root);
414        std::fs::remove_file(storage.analysis_scc_path("imports")).unwrap();
415
416        assert!(!has_fresh_analysis(&storage));
417    }
418
419    #[test]
420    fn has_fresh_analysis_false_when_one_cond_file_missing() {
421        let tmp = tempfile::tempdir().unwrap();
422        let root = tmp.path();
423
424        let hash = write_manifest(root, r#"{"version":"1.0"}"#);
425        write_analysis_files(root, &hash);
426
427        // Remove one condensation file.
428        let storage = GraphStorage::new(root);
429        std::fs::remove_file(storage.analysis_cond_path("references")).unwrap();
430
431        assert!(!has_fresh_analysis(&storage));
432    }
433
434    // ========================================================================
435    // collect_analysis_stats tests
436    // ========================================================================
437
438    #[test]
439    fn collect_analysis_stats_populated() {
440        let calls_kind = EdgeKind::Calls {
441            argument_count: 0,
442            is_async: false,
443        };
444        let imports_kind = EdgeKind::Imports {
445            alias: None,
446            is_wildcard: false,
447        };
448
449        let analyses = GraphAnalyses {
450            adjacency: CsrAdjacency {
451                node_count: 42,
452                edge_count: 100,
453                row_offsets: vec![],
454                col_indices: vec![],
455                edge_kinds: vec![],
456            },
457            scc_calls: make_scc(calls_kind.clone(), 10),
458            scc_imports: make_scc(imports_kind.clone(), 5),
459            scc_references: make_scc(EdgeKind::References, 3),
460            scc_inherits: make_scc(EdgeKind::Inherits, 0),
461            cond_calls: make_cond(calls_kind),
462            cond_imports: make_cond(imports_kind),
463            cond_references: make_cond(EdgeKind::References),
464            cond_inherits: make_cond(EdgeKind::Inherits),
465        };
466
467        let duration = Duration::from_millis(1234);
468        let stats = collect_analysis_stats(&analyses, duration);
469
470        assert_eq!(stats.node_count, 42);
471        assert_eq!(stats.edge_count, 100);
472        assert_eq!(stats.scc_stats.len(), 4);
473
474        // Verify each edge kind is represented correctly.
475        assert_eq!(stats.scc_stats[0].edge_kind, "calls");
476        assert_eq!(stats.scc_stats[0].scc_count, 10);
477        assert_eq!(stats.scc_stats[0].non_trivial_count, 1);
478        assert_eq!(stats.scc_stats[0].max_scc_size, 3);
479
480        assert_eq!(stats.scc_stats[1].edge_kind, "imports");
481        assert_eq!(stats.scc_stats[1].scc_count, 5);
482
483        assert_eq!(stats.scc_stats[2].edge_kind, "references");
484        assert_eq!(stats.scc_stats[2].scc_count, 3);
485
486        assert_eq!(stats.scc_stats[3].edge_kind, "inherits");
487        assert_eq!(stats.scc_stats[3].scc_count, 0);
488        assert_eq!(stats.scc_stats[3].non_trivial_count, 0);
489        assert_eq!(stats.scc_stats[3].max_scc_size, 1);
490
491        // Build time should be faithfully captured.
492        #[allow(clippy::float_cmp)]
493        {
494            assert_eq!(stats.build_time_secs, 1.234);
495        }
496    }
497
498    // ========================================================================
499    // format_stats_text tests
500    // ========================================================================
501
502    #[test]
503    fn format_stats_text_contains_expected_labels() {
504        let calls_kind = EdgeKind::Calls {
505            argument_count: 0,
506            is_async: false,
507        };
508        let imports_kind = EdgeKind::Imports {
509            alias: None,
510            is_wildcard: false,
511        };
512        let stats = AnalysisStats {
513            node_count: 10,
514            edge_count: 20,
515            build_time_secs: 0.5,
516            scc_stats: vec![
517                SccStats {
518                    edge_kind: "calls".to_string(),
519                    scc_count: 3,
520                    non_trivial_count: 1,
521                    max_scc_size: 5,
522                },
523                SccStats {
524                    edge_kind: "imports".to_string(),
525                    scc_count: 2,
526                    non_trivial_count: 0,
527                    max_scc_size: 1,
528                },
529            ],
530        };
531
532        let tmp = tempfile::tempdir().unwrap();
533        let analysis_dir = tmp.path().join("analysis");
534        std::fs::create_dir_all(&analysis_dir).unwrap();
535
536        let output = format_stats_text(&stats, &analysis_dir);
537
538        assert!(
539            output.contains("Graph analysis complete"),
540            "Expected completion marker: {output}"
541        );
542        assert!(output.contains("10 nodes"), "Expected node count: {output}");
543        assert!(output.contains("20 edges"), "Expected edge count: {output}");
544        assert!(output.contains("0.50s"), "Expected build time: {output}");
545        assert!(
546            output.contains("calls"),
547            "Expected calls SCC stats: {output}"
548        );
549        assert!(
550            output.contains("imports"),
551            "Expected imports SCC stats: {output}"
552        );
553        assert!(output.contains("3 SCCs"), "Expected SCC count: {output}");
554        assert!(
555            output.contains("max size: 5"),
556            "Expected max SCC size: {output}"
557        );
558        assert!(
559            output.contains(analysis_dir.to_string_lossy().as_ref()),
560            "Expected analysis dir path: {output}"
561        );
562        // Suppress unused variable warnings in test helper context
563        let _ = calls_kind;
564        let _ = imports_kind;
565    }
566
567    #[test]
568    fn format_stats_text_empty_scc_stats() {
569        let stats = AnalysisStats {
570            node_count: 0,
571            edge_count: 0,
572            build_time_secs: 0.0,
573            scc_stats: vec![],
574        };
575        let tmp = tempfile::tempdir().unwrap();
576        let output = format_stats_text(&stats, tmp.path());
577
578        assert!(
579            output.contains("Graph analysis complete"),
580            "Missing header: {output}"
581        );
582        assert!(output.contains("0 nodes"), "Expected 0 nodes: {output}");
583        assert!(output.contains("0 edges"), "Expected 0 edges: {output}");
584    }
585
586    #[test]
587    fn collect_analysis_stats_empty_graph() {
588        let calls_kind = EdgeKind::Calls {
589            argument_count: 0,
590            is_async: false,
591        };
592        let imports_kind = EdgeKind::Imports {
593            alias: None,
594            is_wildcard: false,
595        };
596
597        let empty_scc = |kind: EdgeKind| SccData {
598            edge_kind: kind,
599            node_count: 0,
600            scc_count: 0,
601            non_trivial_count: 0,
602            max_scc_size: 0,
603            node_to_scc: vec![],
604            scc_offsets: vec![0],
605            scc_members: vec![],
606            has_self_loop: vec![],
607        };
608
609        let analyses = GraphAnalyses {
610            adjacency: CsrAdjacency {
611                node_count: 0,
612                edge_count: 0,
613                row_offsets: vec![0],
614                col_indices: vec![],
615                edge_kinds: vec![],
616            },
617            scc_calls: empty_scc(calls_kind.clone()),
618            scc_imports: empty_scc(imports_kind.clone()),
619            scc_references: empty_scc(EdgeKind::References),
620            scc_inherits: empty_scc(EdgeKind::Inherits),
621            cond_calls: make_cond(calls_kind),
622            cond_imports: make_cond(imports_kind),
623            cond_references: make_cond(EdgeKind::References),
624            cond_inherits: make_cond(EdgeKind::Inherits),
625        };
626
627        let duration = Duration::from_secs(0);
628        let stats = collect_analysis_stats(&analyses, duration);
629
630        assert_eq!(stats.node_count, 0);
631        assert_eq!(stats.edge_count, 0);
632        for scc_stat in &stats.scc_stats {
633            assert_eq!(scc_stat.scc_count, 0);
634            assert_eq!(scc_stat.non_trivial_count, 0);
635            assert_eq!(scc_stat.max_scc_size, 0);
636        }
637    }
638}