Skip to main content

normalize_native_rules/
high_complexity.rs

1//! `high-complexity` native rule — flags functions exceeding a cyclomatic complexity threshold.
2//!
3//! Uses tree-sitter tags queries to identify functions and complexity queries
4//! (or the `compute_complexity` fallback) to measure cyclomatic complexity.
5//!
6//! # Configuration
7//!
8//! The threshold is configurable via `.normalize/config.toml`:
9//!
10//! ```toml
11//! [rules.rule."high-complexity"]
12//! threshold = 10   # default: 20
13//! ```
14
15use normalize_facts::extract::compute_complexity;
16use normalize_languages::parsers::{grammar_loader, parse_with_grammar};
17use normalize_languages::support_for_path;
18use normalize_output::diagnostics::{DiagnosticsReport, Issue, Severity};
19use std::path::Path;
20use streaming_iterator::StreamingIterator;
21
22use crate::cache::{FileRule, run_file_rule};
23use normalize_rules_config::WalkConfig;
24
25/// Serializable per-file finding for the high-complexity rule.
26#[derive(serde::Serialize, serde::Deserialize)]
27pub struct HighComplexityFinding {
28    rel_path: String,
29    name: String,
30    start_line: usize,
31    complexity: usize,
32}
33
34/// Rule that flags functions exceeding a cyclomatic complexity threshold.
35pub struct HighComplexityRule {
36    pub threshold: usize,
37}
38
39impl FileRule for HighComplexityRule {
40    type Finding = HighComplexityFinding;
41
42    fn engine_name(&self) -> &str {
43        "high-complexity"
44    }
45
46    fn config_hash(&self) -> String {
47        self.threshold.to_string()
48    }
49
50    fn check_file(&self, path: &Path, root: &Path) -> Vec<Self::Finding> {
51        let support = match support_for_path(path) {
52            Some(s) => s,
53            None => return Vec::new(),
54        };
55        let content = match std::fs::read_to_string(path) {
56            Ok(c) => c,
57            Err(_) => return Vec::new(),
58        };
59
60        let grammar_name = support.grammar_name();
61        let tree = match parse_with_grammar(grammar_name, &content) {
62            Some(t) => t,
63            None => return Vec::new(),
64        };
65
66        let loader = grammar_loader();
67        let tags_scm = match loader.get_tags(grammar_name) {
68            Some(t) => t,
69            None => return Vec::new(),
70        };
71        let ts_lang = match loader.get(grammar_name) {
72            Ok(l) => l,
73            Err(_) => return Vec::new(),
74        };
75        let tags_query = match tree_sitter::Query::new(&ts_lang, &tags_scm) {
76            Ok(q) => q,
77            Err(_) => return Vec::new(),
78        };
79
80        let complexity_query = loader.get_complexity(grammar_name).and_then(|scm| {
81            let grammar = loader.get(grammar_name).ok()?;
82            tree_sitter::Query::new(&grammar, &scm).ok()
83        });
84
85        let capture_names = tags_query.capture_names();
86        let root_node = tree.root_node();
87        let mut qcursor = tree_sitter::QueryCursor::new();
88        let mut matches = qcursor.matches(&tags_query, root_node, content.as_bytes());
89
90        let rel_path = path
91            .strip_prefix(root)
92            .unwrap_or(path)
93            .to_string_lossy()
94            .to_string();
95
96        let mut results = Vec::new();
97
98        while let Some(m) = matches.next() {
99            for capture in m.captures {
100                let cn = capture_names[capture.index as usize];
101                if !matches!(cn, "definition.function" | "definition.method") {
102                    continue;
103                }
104
105                let node = capture.node;
106                let name = match support.node_name(&node, &content) {
107                    Some(n) => n.to_string(),
108                    None => continue,
109                };
110
111                let complexity = if let Some(ref cq) = complexity_query {
112                    count_complexity_with_query(&node, cq, &content)
113                } else {
114                    compute_complexity(&node, support, content.as_bytes())
115                };
116
117                if complexity >= self.threshold {
118                    let start_line = node.start_position().row + 1;
119                    results.push(HighComplexityFinding {
120                        rel_path: rel_path.clone(),
121                        name,
122                        start_line,
123                        complexity,
124                    });
125                }
126            }
127        }
128
129        results
130    }
131
132    fn to_diagnostics(
133        &self,
134        findings: Vec<(std::path::PathBuf, Vec<Self::Finding>)>,
135        _root: &Path,
136        files_checked: usize,
137    ) -> DiagnosticsReport {
138        let threshold = self.threshold;
139
140        let mut issues: Vec<Issue> = findings
141            .into_iter()
142            .flat_map(|(_path, file_findings)| file_findings)
143            .map(|f| Issue {
144                file: f.rel_path,
145                line: Some(f.start_line),
146                column: None,
147                end_line: None,
148                end_column: None,
149                rule_id: "high-complexity".into(),
150                message: format!(
151                    "function `{}` has cyclomatic complexity {} (threshold: {threshold})",
152                    f.name, f.complexity
153                ),
154                severity: Severity::Warning,
155                source: "high-complexity".into(),
156                related: vec![],
157                suggestion: Some(
158                    "consider extracting helper functions to reduce complexity".into(),
159                ),
160            })
161            .collect();
162
163        // Sort by complexity descending.
164        issues.sort_by(|a, b| {
165            let extract = |msg: &str| -> usize {
166                msg.rsplit("complexity ")
167                    .next()
168                    .and_then(|s| s.split(' ').next())
169                    .and_then(|s| s.parse().ok())
170                    .unwrap_or(0)
171            };
172            extract(&b.message).cmp(&extract(&a.message))
173        });
174
175        DiagnosticsReport {
176            issues,
177            files_checked,
178            sources_run: vec!["high-complexity".into()],
179            tool_errors: vec![],
180            daemon_cached: false,
181        }
182    }
183}
184
185/// Count complexity using a tree-sitter query with `@complexity` captures.
186fn count_complexity_with_query(
187    node: &tree_sitter::Node,
188    query: &tree_sitter::Query,
189    content: &str,
190) -> usize {
191    let complexity_idx = query
192        .capture_names()
193        .iter()
194        .position(|n| *n == "complexity");
195
196    let Some(complexity_idx) = complexity_idx else {
197        return 1;
198    };
199
200    let mut qcursor = tree_sitter::QueryCursor::new();
201    qcursor.set_byte_range(node.byte_range());
202
203    let mut complexity = 1usize;
204    let mut matches = qcursor.matches(query, *node, content.as_bytes());
205    while let Some(m) = matches.next() {
206        for capture in m.captures {
207            if capture.index as usize == complexity_idx {
208                complexity += 1;
209            }
210        }
211    }
212    complexity
213}
214
215/// Build a `DiagnosticsReport` for the `high-complexity` rule.
216///
217/// Walks all source files under `root`, parses each with tree-sitter, and emits
218/// an issue for every function whose cyclomatic complexity meets or exceeds the
219/// threshold.
220pub fn build_high_complexity_report(
221    root: &Path,
222    threshold: usize,
223    explicit_files: Option<&[std::path::PathBuf]>,
224    walk_config: &WalkConfig,
225) -> DiagnosticsReport {
226    let rule = HighComplexityRule { threshold };
227    run_file_rule(&rule, root, explicit_files, walk_config)
228}
229
230#[cfg(test)]
231mod tests {
232    use super::*;
233    use std::io::Write as _;
234
235    /// Write a Python file with a function containing `branch_count` if-branches,
236    /// giving it cyclomatic complexity of `branch_count + 1`.
237    fn make_python_function_with_branches(
238        dir: &std::path::Path,
239        name: &str,
240        branch_count: usize,
241    ) -> std::path::PathBuf {
242        let path = dir.join(name);
243        let mut f = std::fs::File::create(&path).unwrap();
244        writeln!(f, "def complex_function(x):").unwrap();
245        for i in 0..branch_count {
246            writeln!(f, "    if x == {i}:").unwrap();
247            writeln!(f, "        return {i}").unwrap();
248        }
249        writeln!(f, "    return -1").unwrap();
250        path
251    }
252
253    #[test]
254    fn test_default_threshold_not_triggered() {
255        let dir = tempfile::tempdir().unwrap();
256        // 19 branches → complexity 20; threshold is >= 20, so 19 branches (complexity 20) triggers.
257        // Use 18 branches (complexity 19) to stay below default threshold of 20.
258        let path = make_python_function_with_branches(dir.path(), "low.py", 18);
259        let rule = HighComplexityRule { threshold: 20 };
260        let findings = rule.check_file(&path, dir.path());
261        assert!(
262            findings.is_empty(),
263            "complexity 19 should not trigger default threshold of 20; got {} findings",
264            findings.len()
265        );
266    }
267
268    #[test]
269    fn test_default_threshold_triggered() {
270        let dir = tempfile::tempdir().unwrap();
271        // 19 branches → complexity 20 (base 1 + 19 branches); should trigger threshold of 20
272        let path = make_python_function_with_branches(dir.path(), "high.py", 19);
273        let rule = HighComplexityRule { threshold: 20 };
274        let findings = rule.check_file(&path, dir.path());
275        assert!(
276            !findings.is_empty(),
277            "complexity 20 should trigger default threshold of 20"
278        );
279    }
280
281    #[test]
282    fn test_custom_threshold_lower() {
283        let dir = tempfile::tempdir().unwrap();
284        // 5 branches → complexity 6; below default (20) but above custom threshold of 5
285        let path = make_python_function_with_branches(dir.path(), "medium.py", 5);
286        let rule = HighComplexityRule { threshold: 5 };
287        let findings = rule.check_file(&path, dir.path());
288        assert!(
289            !findings.is_empty(),
290            "complexity 6 should trigger custom threshold of 5"
291        );
292    }
293
294    #[test]
295    fn test_custom_threshold_higher() {
296        let dir = tempfile::tempdir().unwrap();
297        // 19 branches → complexity 20; at default (20) but below custom threshold of 30
298        let path = make_python_function_with_branches(dir.path(), "medium.py", 19);
299        let rule = HighComplexityRule { threshold: 30 };
300        let findings = rule.check_file(&path, dir.path());
301        assert!(
302            findings.is_empty(),
303            "complexity 20 should not trigger custom threshold of 30"
304        );
305    }
306}