Skip to main content

normalize_native_rules/
long_function.rs

1//! `long-function` native rule — flags functions exceeding a line count threshold.
2//!
3//! Uses tree-sitter tags queries to identify function boundaries and measures
4//! line span (end_line - start_line + 1).
5//!
6//! # Configuration
7//!
8//! The threshold is configurable via `.normalize/config.toml`:
9//!
10//! ```toml
11//! [rules.rule."long-function"]
12//! threshold = 50   # default: 100
13//! ```
14
15use normalize_languages::parsers::{grammar_loader, parse_with_grammar};
16use normalize_languages::support_for_path;
17use normalize_output::diagnostics::{DiagnosticsReport, Issue, Severity};
18use std::path::Path;
19use streaming_iterator::StreamingIterator;
20
21use crate::cache::{FileRule, run_file_rule};
22use normalize_rules_config::WalkConfig;
23
24/// Serializable per-file finding for the long-function rule.
25#[derive(serde::Serialize, serde::Deserialize)]
26pub struct LongFunctionFinding {
27    rel_path: String,
28    name: String,
29    start_line: usize,
30    line_count: usize,
31}
32
33/// Rule that flags functions exceeding a line count threshold.
34pub struct LongFunctionRule {
35    pub threshold: usize,
36}
37
38impl FileRule for LongFunctionRule {
39    type Finding = LongFunctionFinding;
40
41    fn engine_name(&self) -> &str {
42        "long-function"
43    }
44
45    fn config_hash(&self) -> String {
46        self.threshold.to_string()
47    }
48
49    fn check_file(&self, path: &Path, root: &Path) -> Vec<Self::Finding> {
50        let support = match support_for_path(path) {
51            Some(s) => s,
52            None => return Vec::new(),
53        };
54        let content = match std::fs::read_to_string(path) {
55            Ok(c) => c,
56            Err(_) => return Vec::new(),
57        };
58
59        let grammar_name = support.grammar_name();
60        let tree = match parse_with_grammar(grammar_name, &content) {
61            Some(t) => t,
62            None => return Vec::new(),
63        };
64
65        let loader = grammar_loader();
66        let tags_scm = match loader.get_tags(grammar_name) {
67            Some(t) => t,
68            None => return Vec::new(),
69        };
70        let ts_lang = match loader.get(grammar_name) {
71            Ok(l) => l,
72            Err(_) => return Vec::new(),
73        };
74        let tags_query = match tree_sitter::Query::new(&ts_lang, &tags_scm) {
75            Ok(q) => q,
76            Err(_) => return Vec::new(),
77        };
78
79        let capture_names = tags_query.capture_names();
80        let root_node = tree.root_node();
81        let mut qcursor = tree_sitter::QueryCursor::new();
82        let mut matches = qcursor.matches(&tags_query, root_node, content.as_bytes());
83
84        let rel_path = path
85            .strip_prefix(root)
86            .unwrap_or(path)
87            .to_string_lossy()
88            .to_string();
89
90        let mut results = Vec::new();
91
92        while let Some(m) = matches.next() {
93            for capture in m.captures {
94                let cn = capture_names[capture.index as usize];
95                if !matches!(cn, "definition.function" | "definition.method") {
96                    continue;
97                }
98
99                let node = capture.node;
100                let name = match support.node_name(&node, &content) {
101                    Some(n) => n.to_string(),
102                    None => continue,
103                };
104
105                let start_line = node.start_position().row + 1;
106                let end_line = node.end_position().row + 1;
107                let line_count = end_line.saturating_sub(start_line) + 1;
108
109                if line_count >= self.threshold {
110                    results.push(LongFunctionFinding {
111                        rel_path: rel_path.clone(),
112                        name,
113                        start_line,
114                        line_count,
115                    });
116                }
117            }
118        }
119
120        results
121    }
122
123    fn to_diagnostics(
124        &self,
125        findings: Vec<(std::path::PathBuf, Vec<Self::Finding>)>,
126        _root: &Path,
127        files_checked: usize,
128    ) -> DiagnosticsReport {
129        let threshold = self.threshold;
130
131        let mut issues: Vec<Issue> = findings
132            .into_iter()
133            .flat_map(|(_path, file_findings)| file_findings)
134            .map(|f| Issue {
135                file: f.rel_path,
136                line: Some(f.start_line),
137                column: None,
138                end_line: None,
139                end_column: None,
140                rule_id: "long-function".into(),
141                message: format!(
142                    "function `{}` is {} lines (threshold: {threshold})",
143                    f.name, f.line_count
144                ),
145                severity: Severity::Warning,
146                source: "long-function".into(),
147                related: vec![],
148                suggestion: Some(
149                    "consider breaking this function into smaller, focused functions".into(),
150                ),
151            })
152            .collect();
153
154        // Sort by line count descending.
155        issues.sort_by(|a, b| {
156            let extract = |msg: &str| -> usize {
157                msg.split(" is ")
158                    .nth(1)
159                    .and_then(|s| s.split(' ').next())
160                    .and_then(|s| s.parse().ok())
161                    .unwrap_or(0)
162            };
163            extract(&b.message).cmp(&extract(&a.message))
164        });
165
166        DiagnosticsReport {
167            issues,
168            files_checked,
169            sources_run: vec!["long-function".into()],
170            tool_errors: vec![],
171            daemon_cached: false,
172        }
173    }
174}
175
176/// Build a `DiagnosticsReport` for the `long-function` rule.
177///
178/// Walks all source files under `root`, parses each with tree-sitter, and emits
179/// an issue for every function whose line span meets or exceeds the threshold.
180pub fn build_long_function_report(
181    root: &Path,
182    threshold: usize,
183    explicit_files: Option<&[std::path::PathBuf]>,
184    walk_config: &WalkConfig,
185) -> DiagnosticsReport {
186    let rule = LongFunctionRule { threshold };
187    run_file_rule(&rule, root, explicit_files, walk_config)
188}
189
190#[cfg(test)]
191mod tests {
192    use super::*;
193    use std::io::Write as _;
194
195    /// Write a Python file with a single function spanning `body_lines` lines.
196    fn make_python_function(
197        dir: &std::path::Path,
198        name: &str,
199        body_lines: usize,
200    ) -> std::path::PathBuf {
201        let path = dir.join(name);
202        let mut f = std::fs::File::create(&path).unwrap();
203        writeln!(f, "def long_function():").unwrap();
204        for i in 0..body_lines {
205            writeln!(f, "    x = {i}").unwrap();
206        }
207        path
208    }
209
210    #[test]
211    fn test_default_threshold_not_triggered() {
212        let dir = tempfile::tempdir().unwrap();
213        // 99-line body → 100 total lines but function span is 100 (threshold is >=)
214        let path = make_python_function(dir.path(), "short.py", 98);
215        let rule = LongFunctionRule { threshold: 100 };
216        let findings = rule.check_file(&path, dir.path());
217        assert!(
218            findings.is_empty(),
219            "99-line function should not trigger default threshold of 100"
220        );
221    }
222
223    #[test]
224    fn test_default_threshold_triggered() {
225        let dir = tempfile::tempdir().unwrap();
226        // 100-line body → function span >= 100
227        let path = make_python_function(dir.path(), "long.py", 100);
228        let rule = LongFunctionRule { threshold: 100 };
229        let findings = rule.check_file(&path, dir.path());
230        assert!(
231            !findings.is_empty(),
232            "100-line function should trigger threshold of 100"
233        );
234    }
235
236    #[test]
237    fn test_custom_threshold_lower() {
238        let dir = tempfile::tempdir().unwrap();
239        // 30-line body — below default (100) but above custom threshold of 20
240        let path = make_python_function(dir.path(), "medium.py", 30);
241        let rule = LongFunctionRule { threshold: 20 };
242        let findings = rule.check_file(&path, dir.path());
243        assert!(
244            !findings.is_empty(),
245            "30-line function should trigger custom threshold of 20"
246        );
247    }
248
249    #[test]
250    fn test_custom_threshold_higher() {
251        let dir = tempfile::tempdir().unwrap();
252        // 100-line body — at default (100) but below custom threshold of 200
253        let path = make_python_function(dir.path(), "medium.py", 100);
254        let rule = LongFunctionRule { threshold: 200 };
255        let findings = rule.check_file(&path, dir.path());
256        assert!(
257            findings.is_empty(),
258            "100-line function should not trigger custom threshold of 200"
259        );
260    }
261}