Skip to main content

kiss/
stats.rs

1use crate::graph::DependencyGraph;
2use crate::parsing::ParsedFile;
3use crate::py_metrics::{compute_class_metrics, compute_file_metrics, compute_function_metrics};
4use crate::rust_fn_metrics::{
5    compute_rust_file_metrics, compute_rust_function_metrics, count_non_doc_attrs, is_cfg_test_mod,
6};
7use crate::rust_parsing::ParsedRustFile;
8use rayon::prelude::*;
9use syn::{ImplItem, Item};
10use tree_sitter::Node;
11
12#[derive(Debug, Default)]
13pub struct MetricStats {
14    pub statements_per_function: Vec<usize>,
15    pub arguments_per_function: Vec<usize>,
16    pub arguments_positional: Vec<usize>,
17    pub arguments_keyword_only: Vec<usize>,
18    pub max_indentation: Vec<usize>,
19    pub nested_function_depth: Vec<usize>,
20    pub returns_per_function: Vec<usize>,
21    pub return_values_per_function: Vec<usize>,
22    pub branches_per_function: Vec<usize>,
23    pub local_variables_per_function: Vec<usize>,
24    pub statements_per_try_block: Vec<usize>,
25    pub boolean_parameters: Vec<usize>,
26    pub annotations_per_function: Vec<usize>,
27    pub calls_per_function: Vec<usize>,
28    pub methods_per_class: Vec<usize>,
29    pub statements_per_file: Vec<usize>,
30    pub lines_per_file: Vec<usize>,
31    pub functions_per_file: Vec<usize>,
32    pub interface_types_per_file: Vec<usize>,
33    pub concrete_types_per_file: Vec<usize>,
34    pub imported_names_per_file: Vec<usize>,
35    pub fan_in: Vec<usize>,
36    pub fan_out: Vec<usize>,
37    pub cycle_size: Vec<usize>,
38    pub indirect_dependencies: Vec<usize>,
39    pub dependency_depth: Vec<usize>,
40}
41
42impl MetricStats {
43    pub fn collect(parsed_files: &[&ParsedFile]) -> Self {
44        // This is the hot path for `kiss stats`: per-file tree walks and per-function metric
45        // extraction. Parallelize at the file level and merge the per-thread aggregates.
46        parsed_files
47            .par_iter()
48            .map(|parsed| {
49                let mut stats = Self::default();
50                let fm = compute_file_metrics(parsed);
51                stats.statements_per_file.push(fm.statements);
52                stats.lines_per_file.push(parsed.source.lines().count());
53                stats.functions_per_file.push(fm.functions);
54                stats.interface_types_per_file.push(fm.interface_types);
55                stats.concrete_types_per_file.push(fm.concrete_types);
56                stats.imported_names_per_file.push(fm.imports);
57                collect_from_node(parsed.tree.root_node(), &parsed.source, &mut stats, false);
58                stats
59            })
60            .reduce(Self::default, |mut a, b| {
61                a.merge(b);
62                a
63            })
64    }
65
66    pub fn merge(&mut self, o: Self) {
67        macro_rules! ext { ($($f:ident),*) => { $(self.$f.extend(o.$f);)* }; }
68        ext!(
69            statements_per_function,
70            arguments_per_function,
71            arguments_positional,
72            arguments_keyword_only,
73            max_indentation,
74            nested_function_depth,
75            returns_per_function,
76            return_values_per_function,
77            branches_per_function,
78            local_variables_per_function,
79            statements_per_try_block,
80            boolean_parameters,
81            annotations_per_function,
82            calls_per_function,
83            methods_per_class,
84            statements_per_file,
85            lines_per_file,
86            functions_per_file,
87            interface_types_per_file,
88            concrete_types_per_file,
89            imported_names_per_file,
90            fan_in,
91            fan_out,
92            cycle_size,
93            indirect_dependencies,
94            dependency_depth
95        );
96    }
97
98    pub fn collect_graph_metrics(&mut self, graph: &DependencyGraph) {
99        use std::collections::HashMap;
100
101        let cycles = graph.find_cycles().cycles;
102        let mut cycle_size_by_module: HashMap<&str, usize> = HashMap::new();
103        for cycle in &cycles {
104            let size = cycle.len();
105            for m in cycle {
106                cycle_size_by_module.insert(m.as_str(), size);
107            }
108        }
109
110        // Only include internal modules (those with a known path). External imports create nodes
111        // but should not skew per-module distributions in `stats`.
112        for name in graph.paths.keys() {
113            let m = graph.module_metrics(name);
114            self.fan_in.push(m.fan_in);
115            self.fan_out.push(m.fan_out);
116            self.indirect_dependencies.push(m.indirect_dependencies);
117            self.dependency_depth.push(m.dependency_depth);
118            self.cycle_size
119                .push(*cycle_size_by_module.get(name.as_str()).unwrap_or(&0));
120        }
121    }
122
123    pub fn max_depth(&self) -> usize {
124        self.dependency_depth.iter().copied().max().unwrap_or(0)
125    }
126
127    pub fn collect_rust(parsed_files: &[&ParsedRustFile]) -> Self {
128        let mut stats = Self::default();
129        for parsed in parsed_files {
130            let fm = compute_rust_file_metrics(parsed);
131            stats.statements_per_file.push(fm.statements);
132            stats.lines_per_file.push(parsed.source.lines().count());
133            stats.functions_per_file.push(fm.functions);
134            stats.interface_types_per_file.push(fm.interface_types);
135            stats.concrete_types_per_file.push(fm.concrete_types);
136            stats.imported_names_per_file.push(fm.imports);
137            collect_rust_from_items(&parsed.ast.items, &mut stats);
138        }
139        stats
140    }
141}
142
143// inside_class tracks context for method counting; passed through recursion to nested scopes
144#[allow(clippy::only_used_in_recursion)]
145fn collect_from_node(node: Node, source: &str, stats: &mut MetricStats, inside_class: bool) {
146    match node.kind() {
147        "function_definition" | "async_function_definition" => {
148            let m = compute_function_metrics(node, source);
149            if !m.has_error {
150                push_py_fn_metrics(stats, &m);
151            }
152            let mut c = node.walk();
153            for child in node.children(&mut c) {
154                collect_from_node(child, source, stats, false);
155            }
156        }
157        "class_definition" => {
158            let m = compute_class_metrics(node);
159            stats.methods_per_class.push(m.methods);
160            let mut c = node.walk();
161            for child in node.children(&mut c) {
162                collect_from_node(child, source, stats, true);
163            }
164        }
165        _ => {
166            let mut c = node.walk();
167            for child in node.children(&mut c) {
168                collect_from_node(child, source, stats, inside_class);
169            }
170        }
171    }
172}
173
174fn push_py_fn_metrics(stats: &mut MetricStats, m: &crate::py_metrics::FunctionMetrics) {
175    stats.statements_per_function.push(m.statements);
176    stats.arguments_per_function.push(m.arguments);
177    stats.arguments_positional.push(m.arguments_positional);
178    stats.arguments_keyword_only.push(m.arguments_keyword_only);
179    stats.max_indentation.push(m.max_indentation);
180    stats.nested_function_depth.push(m.nested_function_depth);
181    stats.returns_per_function.push(m.returns);
182    stats.return_values_per_function.push(m.max_return_values);
183    stats.branches_per_function.push(m.branches);
184    stats.local_variables_per_function.push(m.local_variables);
185    stats
186        .statements_per_try_block
187        .push(m.max_try_block_statements);
188    stats.boolean_parameters.push(m.boolean_parameters);
189    stats.annotations_per_function.push(m.decorators);
190    stats.calls_per_function.push(m.calls);
191}
192
193fn push_rust_fn_metrics(stats: &mut MetricStats, m: &crate::rust_counts::RustFunctionMetrics) {
194    stats.statements_per_function.push(m.statements);
195    stats.arguments_per_function.push(m.arguments);
196    // N/A: Rust has no positional/keyword distinction; don't push to avoid skewing distributions
197    stats.max_indentation.push(m.max_indentation);
198    stats.nested_function_depth.push(m.nested_function_depth);
199    stats.returns_per_function.push(m.returns);
200    // N/A: Rust doesn't have multiple-return-value tuples in the same sense as Python
201    stats.branches_per_function.push(m.branches);
202    stats.local_variables_per_function.push(m.local_variables);
203    // N/A: try-block size is Python-only; don't push to avoid skewing distributions
204    stats.boolean_parameters.push(m.bool_parameters);
205    stats.annotations_per_function.push(m.attributes);
206    stats.calls_per_function.push(m.calls);
207}
208
209fn collect_rust_from_items(items: &[Item], stats: &mut MetricStats) {
210    for item in items {
211        match item {
212            Item::Fn(f) => push_rust_fn_metrics(
213                stats,
214                &compute_rust_function_metrics(
215                    &f.sig.inputs,
216                    &f.block,
217                    count_non_doc_attrs(&f.attrs),
218                ),
219            ),
220            Item::Impl(i) => {
221                let mcnt = i
222                    .items
223                    .iter()
224                    .filter(|ii| matches!(ii, ImplItem::Fn(_)))
225                    .count();
226                stats.methods_per_class.push(mcnt);
227                for ii in &i.items {
228                    if let ImplItem::Fn(m) = ii {
229                        push_rust_fn_metrics(
230                            stats,
231                            &compute_rust_function_metrics(
232                                &m.sig.inputs,
233                                &m.block,
234                                count_non_doc_attrs(&m.attrs),
235                            ),
236                        );
237                    }
238                }
239            }
240            Item::Mod(m) => {
241                if !is_cfg_test_mod(m)
242                    && let Some((_, items)) = &m.content
243                {
244                    collect_rust_from_items(items, stats);
245                }
246            }
247            _ => {}
248        }
249    }
250}
251
252#[allow(
253    clippy::cast_precision_loss,
254    clippy::cast_possible_truncation,
255    clippy::cast_sign_loss
256)]
257pub fn percentile(sorted: &[usize], p: f64) -> usize {
258    if sorted.is_empty() {
259        return 0;
260    }
261    let len = sorted.len();
262    let idx_f = (len.saturating_sub(1) as f64) * p / 100.0;
263    let idx = idx_f.round().max(0.0) as usize;
264    sorted[idx.min(len - 1)]
265}
266
267#[derive(Debug, Clone, Copy, PartialEq, Eq)]
268pub enum MetricScope {
269    Function,
270    Type,
271    File,
272    Module,
273}
274
275#[derive(Debug, Clone, Copy)]
276pub struct MetricDef {
277    pub metric_id: &'static str,
278    pub scope: MetricScope,
279}
280
281/// Central registry of all metrics with stable IDs
282pub const METRICS: &[MetricDef] = &[
283    MetricDef {
284        metric_id: "statements_per_function",
285        scope: MetricScope::Function,
286    },
287    MetricDef {
288        metric_id: "arguments_per_function",
289        scope: MetricScope::Function,
290    },
291    MetricDef {
292        metric_id: "positional_args",
293        scope: MetricScope::Function,
294    },
295    MetricDef {
296        metric_id: "keyword_only_args",
297        scope: MetricScope::Function,
298    },
299    MetricDef {
300        metric_id: "max_indentation_depth",
301        scope: MetricScope::Function,
302    },
303    MetricDef {
304        metric_id: "nested_function_depth",
305        scope: MetricScope::Function,
306    },
307    MetricDef {
308        metric_id: "returns_per_function",
309        scope: MetricScope::Function,
310    },
311    MetricDef {
312        metric_id: "return_values_per_function",
313        scope: MetricScope::Function,
314    },
315    MetricDef {
316        metric_id: "branches_per_function",
317        scope: MetricScope::Function,
318    },
319    MetricDef {
320        metric_id: "local_variables_per_function",
321        scope: MetricScope::Function,
322    },
323    MetricDef {
324        metric_id: "statements_per_try_block",
325        scope: MetricScope::Function,
326    },
327    MetricDef {
328        metric_id: "boolean_parameters",
329        scope: MetricScope::Function,
330    },
331    MetricDef {
332        metric_id: "annotations_per_function",
333        scope: MetricScope::Function,
334    },
335    MetricDef {
336        metric_id: "calls_per_function",
337        scope: MetricScope::Function,
338    },
339    MetricDef {
340        metric_id: "methods_per_class",
341        scope: MetricScope::Type,
342    },
343    MetricDef {
344        metric_id: "statements_per_file",
345        scope: MetricScope::File,
346    },
347    MetricDef {
348        metric_id: "lines_per_file",
349        scope: MetricScope::File,
350    },
351    MetricDef {
352        metric_id: "functions_per_file",
353        scope: MetricScope::File,
354    },
355    MetricDef {
356        metric_id: "interface_types_per_file",
357        scope: MetricScope::File,
358    },
359    MetricDef {
360        metric_id: "concrete_types_per_file",
361        scope: MetricScope::File,
362    },
363    MetricDef {
364        metric_id: "imported_names_per_file",
365        scope: MetricScope::File,
366    },
367    MetricDef {
368        metric_id: "fan_in",
369        scope: MetricScope::Module,
370    },
371    MetricDef {
372        metric_id: "fan_out",
373        scope: MetricScope::Module,
374    },
375    MetricDef {
376        metric_id: "cycle_size",
377        scope: MetricScope::Module,
378    },
379    MetricDef {
380        metric_id: "indirect_dependencies",
381        scope: MetricScope::Module,
382    },
383    MetricDef {
384        metric_id: "dependency_depth",
385        scope: MetricScope::Module,
386    },
387];
388
389pub fn get_metric_def(metric_id: &str) -> Option<&'static MetricDef> {
390    METRICS.iter().find(|m| m.metric_id == metric_id)
391}
392
393#[derive(Debug)]
394pub struct PercentileSummary {
395    pub metric_id: &'static str,
396    pub count: usize,
397    pub p50: usize,
398    pub p90: usize,
399    pub p95: usize,
400    pub p99: usize,
401    pub max: usize,
402}
403
404impl PercentileSummary {
405    pub fn from_values(metric_id: &'static str, values: &[usize]) -> Self {
406        if values.is_empty() {
407            return Self {
408                metric_id,
409                count: 0,
410                p50: 0,
411                p90: 0,
412                p95: 0,
413                p99: 0,
414                max: 0,
415            };
416        }
417        let mut sorted = values.to_vec();
418        sorted.sort_unstable();
419        Self {
420            metric_id,
421            count: sorted.len(),
422            p50: percentile(&sorted, 50.0),
423            p90: percentile(&sorted, 90.0),
424            p95: percentile(&sorted, 95.0),
425            p99: percentile(&sorted, 99.0),
426            max: *sorted.last().unwrap_or(&0),
427        }
428    }
429}
430
431fn metric_values<'a>(stats: &'a MetricStats, metric_id: &str) -> Option<&'a [usize]> {
432    Some(match metric_id {
433        "statements_per_function" => &stats.statements_per_function,
434        "arguments_per_function" => &stats.arguments_per_function,
435        "positional_args" => &stats.arguments_positional,
436        "keyword_only_args" => &stats.arguments_keyword_only,
437        "max_indentation_depth" => &stats.max_indentation,
438        "nested_function_depth" => &stats.nested_function_depth,
439        "returns_per_function" => &stats.returns_per_function,
440        "return_values_per_function" => &stats.return_values_per_function,
441        "branches_per_function" => &stats.branches_per_function,
442        "local_variables_per_function" => &stats.local_variables_per_function,
443        "statements_per_try_block" => &stats.statements_per_try_block,
444        "boolean_parameters" => &stats.boolean_parameters,
445        "annotations_per_function" => &stats.annotations_per_function,
446        "calls_per_function" => &stats.calls_per_function,
447        "methods_per_class" => &stats.methods_per_class,
448        "statements_per_file" => &stats.statements_per_file,
449        "lines_per_file" => &stats.lines_per_file,
450        "functions_per_file" => &stats.functions_per_file,
451        "interface_types_per_file" => &stats.interface_types_per_file,
452        "concrete_types_per_file" => &stats.concrete_types_per_file,
453        "imported_names_per_file" => &stats.imported_names_per_file,
454        "fan_in" => &stats.fan_in,
455        "fan_out" => &stats.fan_out,
456        "cycle_size" => &stats.cycle_size,
457        "indirect_dependencies" => &stats.indirect_dependencies,
458        "dependency_depth" => &stats.dependency_depth,
459        _ => return None,
460    })
461}
462
463pub fn compute_summaries(stats: &MetricStats) -> Vec<PercentileSummary> {
464    METRICS
465        .iter()
466        .filter_map(|m| {
467            let values = metric_values(stats, m.metric_id)?;
468            if values.is_empty() {
469                None
470            } else {
471                Some(PercentileSummary::from_values(m.metric_id, values))
472            }
473        })
474        .collect()
475}
476
477pub fn format_stats_table(summaries: &[PercentileSummary]) -> String {
478    use std::fmt::Write;
479    let header = format!(
480        "{:<28} {:>5} {:>5} {:>5} {:>5} {:>5} {:>5}",
481        "metric_id", "N", "p50", "p90", "p95", "p99", "max"
482    );
483    let mut out = header;
484    out.push('\n');
485    out.push_str(&"-".repeat(out.trim_end_matches('\n').len()));
486    out.push('\n');
487    for s in summaries.iter().filter(|s| s.count > 0) {
488        let _ = writeln!(
489            out,
490            "{:<28} {:>5} {:>5} {:>5} {:>5} {:>5} {:>5}",
491            s.metric_id, s.count, s.p50, s.p90, s.p95, s.p99, s.max
492        );
493    }
494    out
495}
496
497/// Map `metric_id` to config key (some metrics use different config key names)
498fn config_key_for(metric_id: &str) -> Option<&'static str> {
499    Some(match metric_id {
500        "statements_per_function" => "statements_per_function",
501        "arguments_per_function" => "arguments_per_function",
502        "positional_args" => "arguments_positional",
503        "keyword_only_args" => "arguments_keyword_only",
504        "max_indentation_depth" => "max_indentation_depth",
505        "nested_function_depth" => "nested_function_depth",
506        "returns_per_function" => "returns_per_function",
507        "return_values_per_function" => "return_values_per_function",
508        "branches_per_function" => "branches_per_function",
509        "local_variables_per_function" => "local_variables_per_function",
510        "statements_per_try_block" => "statements_per_try_block",
511        "boolean_parameters" => "boolean_parameters",
512        "annotations_per_function" => "annotations_per_function",
513        "calls_per_function" => "calls_per_function",
514        "methods_per_class" => "methods_per_class",
515        "statements_per_file" => "statements_per_file",
516        "lines_per_file" => "lines_per_file",
517        "functions_per_file" => "functions_per_file",
518        "interface_types_per_file" => "interface_types_per_file",
519        "concrete_types_per_file" => "concrete_types_per_file",
520        "imported_names_per_file" => "imported_names_per_file",
521        "fan_in" => "fan_in",
522        "fan_out" => "fan_out",
523        "cycle_size" => "cycle_size",
524        "indirect_dependencies" => "indirect_dependencies",
525        "dependency_depth" => "dependency_depth",
526        _ => return None,
527    })
528}
529
530pub fn generate_config_toml(summaries: &[PercentileSummary]) -> String {
531    use std::fmt::Write;
532    let mut out = String::from(
533        "# Generated by kiss mimic\n# Thresholds based on 99th percentile\n\n[thresholds]\n",
534    );
535    for s in summaries {
536        if let Some(k) = config_key_for(s.metric_id) {
537            let _ = writeln!(out, "{k} = {}", s.p99);
538        }
539    }
540    out
541}
542
543#[cfg(test)]
544mod tests {
545    use super::*;
546    use crate::parsing::{create_parser, parse_file};
547    use crate::rust_parsing::parse_rust_file;
548    use std::io::Write;
549
550    #[test]
551    fn test_stats_helpers() {
552        assert_eq!(percentile(&[], 50.0), 0);
553        assert_eq!(percentile(&[42], 50.0), 42);
554        let s = PercentileSummary::from_values("test_id", &[]);
555        assert_eq!(s.count, 0);
556        let vals: Vec<usize> = (1..=100).collect();
557        assert_eq!(PercentileSummary::from_values("test_id", &vals).max, 100);
558        let mut a = MetricStats::default();
559        a.statements_per_function.push(5);
560        let mut b = MetricStats::default();
561        b.statements_per_function.push(10);
562        a.merge(b);
563        assert_eq!(a.statements_per_function.len(), 2);
564        let s2 = MetricStats {
565            statements_per_function: vec![1, 2, 3],
566            ..Default::default()
567        };
568        assert!(!compute_summaries(&s2).is_empty());
569        let toml = generate_config_toml(&[PercentileSummary {
570            metric_id: "statements_per_function",
571            count: 10,
572            p50: 5,
573            p90: 9,
574            p95: 10,
575            p99: 15,
576            max: 20,
577        }]);
578        assert!(toml.contains("statements_per_function = 15"));
579        assert_eq!(
580            config_key_for("statements_per_function"),
581            Some("statements_per_function")
582        );
583        assert!(
584            format_stats_table(&[PercentileSummary {
585                metric_id: "test_id",
586                count: 10,
587                p50: 5,
588                p90: 8,
589                p95: 9,
590                p99: 10,
591                max: 12
592            }])
593            .contains("test_id")
594        );
595    }
596
597    #[test]
598    fn test_metric_registry() {
599        assert!(get_metric_def("statements_per_function").is_some());
600        assert!(get_metric_def("fan_in").is_some());
601        assert!(get_metric_def("nonexistent").is_none());
602        assert_eq!(
603            get_metric_def("statements_per_function").unwrap().scope,
604            MetricScope::Function
605        );
606        assert_eq!(get_metric_def("fan_in").unwrap().scope, MetricScope::Module);
607        assert!(METRICS.len() > 20); // Verify we have a reasonable number of metrics
608        // Test MetricDef struct fields
609        let def = MetricDef {
610            metric_id: "test",
611            scope: MetricScope::Function,
612        };
613        assert_eq!(def.metric_id, "test");
614    }
615
616    #[test]
617    fn test_push_py_fn_metrics() {
618        let mut stats = MetricStats::default();
619        let m = crate::py_metrics::FunctionMetrics {
620            statements: 3,
621            arguments: 2,
622            has_error: false,
623            ..Default::default()
624        };
625        push_py_fn_metrics(&mut stats, &m);
626        assert_eq!(stats.statements_per_function, vec![3]);
627        assert_eq!(stats.arguments_per_function, vec![2]);
628    }
629
630    #[test]
631    fn test_collection() {
632        let mut stats = MetricStats::default();
633        let mut tmp_rs = tempfile::NamedTempFile::with_suffix(".rs").unwrap();
634        write!(tmp_rs, "fn foo() {{ let x = 1; }}").unwrap();
635        let parsed_rs = parse_rust_file(tmp_rs.path()).unwrap();
636        assert!(
637            !MetricStats::collect_rust(&[&parsed_rs])
638                .statements_per_file
639                .is_empty()
640        );
641        let mut tmp_py = tempfile::NamedTempFile::with_suffix(".py").unwrap();
642        write!(tmp_py, "def foo():\n    x = 1").unwrap();
643        let parsed_py = parse_file(&mut create_parser().unwrap(), tmp_py.path()).unwrap();
644        let mut stats2 = MetricStats::default();
645        collect_from_node(
646            parsed_py.tree.root_node(),
647            &parsed_py.source,
648            &mut stats2,
649            false,
650        );
651        assert!(!stats2.statements_per_function.is_empty());
652        let m = crate::rust_counts::RustFunctionMetrics {
653            statements: 5,
654            arguments: 2,
655            max_indentation: 1,
656            nested_function_depth: 0,
657            returns: 1,
658            branches: 0,
659            local_variables: 2,
660            bool_parameters: 0,
661            attributes: 0,
662            calls: 3,
663        };
664        push_rust_fn_metrics(&mut stats, &m);
665        let ast: syn::File = syn::parse_str("fn bar() { let y = 2; }").unwrap();
666        collect_rust_from_items(&ast.items, &mut stats);
667    }
668
669    #[test]
670    fn test_graph_metrics() {
671        let mut stats = MetricStats::default();
672        let mut graph = crate::graph::DependencyGraph::new();
673        graph.add_dependency("a", "b");
674        graph.add_dependency("b", "c");
675        graph
676            .paths
677            .insert("a".into(), std::path::PathBuf::from("a.py"));
678        graph
679            .paths
680            .insert("b".into(), std::path::PathBuf::from("b.py"));
681        graph
682            .paths
683            .insert("c".into(), std::path::PathBuf::from("c.py"));
684        stats.collect_graph_metrics(&graph);
685        assert!(!stats.fan_in.is_empty());
686        assert!(!stats.fan_out.is_empty());
687        assert!(!stats.indirect_dependencies.is_empty());
688        assert!(!stats.dependency_depth.is_empty());
689        assert!(stats.max_depth() > 0);
690    }
691
692    #[test]
693    fn test_graph_metrics_exclude_external_nodes_from_distributions() {
694        // Regression: `stats` distributions should only include internal modules (those with paths).
695        let mut stats = MetricStats::default();
696        let mut graph = crate::graph::DependencyGraph::new();
697
698        // Internal module a imports an external node "os".
699        graph.get_or_create_node("a");
700        graph
701            .paths
702            .insert("a".into(), std::path::PathBuf::from("a.py"));
703        graph.add_dependency("a", "os");
704
705        stats.collect_graph_metrics(&graph);
706        assert_eq!(
707            stats.fan_out.len(),
708            1,
709            "fan_out should only include internal modules"
710        );
711        assert_eq!(
712            stats.fan_out[0], 1,
713            "a should have one outgoing edge (to external os)"
714        );
715    }
716
717    #[test]
718    fn test_cycle_size_is_per_module_distribution() {
719        // Regression guard: module-scoped metrics should collect one value per module.
720        // `cycle_size` should behave like fan_in/fan_out: for modules in a cycle, record the size
721        // of their cycle (SCC); for modules not in any cycle, record 0.
722        let mut stats = MetricStats::default();
723        let mut graph = crate::graph::DependencyGraph::new();
724
725        // a <-> b forms a 2-module cycle; c is acyclic.
726        graph.add_dependency("a", "b");
727        graph.add_dependency("b", "a");
728        graph.get_or_create_node("c");
729        graph
730            .paths
731            .insert("a".into(), std::path::PathBuf::from("a.py"));
732        graph
733            .paths
734            .insert("b".into(), std::path::PathBuf::from("b.py"));
735        graph
736            .paths
737            .insert("c".into(), std::path::PathBuf::from("c.py"));
738
739        stats.collect_graph_metrics(&graph);
740
741        assert_eq!(
742            stats.cycle_size.len(),
743            graph.paths.len(),
744            "Expected cycle_size to have one entry per internal module (got {:?} for {} modules)",
745            stats.cycle_size,
746            graph.paths.len()
747        );
748
749        let mut got = stats.cycle_size.clone();
750        got.sort_unstable();
751        assert_eq!(
752            got,
753            vec![0, 2, 2],
754            "Expected modules in the cycle to record 2, and the acyclic module to record 0"
755        );
756    }
757
758    // === Bug-hunting tests ===
759
760    #[test]
761    fn test_generate_config_toml_includes_boolean_parameters() {
762        // generate_config_toml should include newer metrics like boolean_parameters
763        let summaries = vec![PercentileSummary {
764            metric_id: "boolean_parameters",
765            count: 10,
766            p50: 0,
767            p90: 1,
768            p95: 1,
769            p99: 2,
770            max: 3,
771        }];
772        let toml = generate_config_toml(&summaries);
773        assert!(
774            toml.contains("boolean_parameters"),
775            "Generated config should include boolean_parameters threshold"
776        );
777    }
778
779    #[test]
780    fn test_metric_values() {
781        let mut stats = MetricStats::default();
782        stats.statements_per_function.push(10);
783        stats.arguments_per_function.push(3);
784        stats.fan_in.push(2);
785        assert_eq!(
786            super::metric_values(&stats, "statements_per_function"),
787            Some(&[10][..])
788        );
789        assert_eq!(
790            super::metric_values(&stats, "arguments_per_function"),
791            Some(&[3][..])
792        );
793        assert_eq!(super::metric_values(&stats, "fan_in"), Some(&[2][..]));
794        assert_eq!(super::metric_values(&stats, "unknown_metric"), None);
795    }
796}