1use normalize_languages::parsers::{grammar_loader, parse_with_grammar};
16use normalize_languages::support_for_path;
17use normalize_output::diagnostics::{DiagnosticsReport, Issue, Severity};
18use std::path::Path;
19use streaming_iterator::StreamingIterator;
20
21use crate::cache::{FileRule, run_file_rule};
22use normalize_rules_config::WalkConfig;
23
24#[derive(serde::Serialize, serde::Deserialize)]
26pub struct LongFunctionFinding {
27 rel_path: String,
28 name: String,
29 start_line: usize,
30 line_count: usize,
31}
32
33pub struct LongFunctionRule {
35 pub threshold: usize,
36}
37
38impl FileRule for LongFunctionRule {
39 type Finding = LongFunctionFinding;
40
41 fn engine_name(&self) -> &str {
42 "long-function"
43 }
44
45 fn config_hash(&self) -> String {
46 self.threshold.to_string()
47 }
48
49 fn check_file(&self, path: &Path, root: &Path) -> Vec<Self::Finding> {
50 let support = match support_for_path(path) {
51 Some(s) => s,
52 None => return Vec::new(),
53 };
54 let content = match std::fs::read_to_string(path) {
55 Ok(c) => c,
56 Err(_) => return Vec::new(),
57 };
58
59 let grammar_name = support.grammar_name();
60 let tree = match parse_with_grammar(grammar_name, &content) {
61 Some(t) => t,
62 None => return Vec::new(),
63 };
64
65 let loader = grammar_loader();
66 let tags_scm = match loader.get_tags(grammar_name) {
67 Some(t) => t,
68 None => return Vec::new(),
69 };
70 let ts_lang = match loader.get(grammar_name) {
71 Ok(l) => l,
72 Err(_) => return Vec::new(),
73 };
74 let tags_query = match tree_sitter::Query::new(&ts_lang, &tags_scm) {
75 Ok(q) => q,
76 Err(_) => return Vec::new(),
77 };
78
79 let capture_names = tags_query.capture_names();
80 let root_node = tree.root_node();
81 let mut qcursor = tree_sitter::QueryCursor::new();
82 let mut matches = qcursor.matches(&tags_query, root_node, content.as_bytes());
83
84 let rel_path = path
85 .strip_prefix(root)
86 .unwrap_or(path)
87 .to_string_lossy()
88 .to_string();
89
90 let mut results = Vec::new();
91
92 while let Some(m) = matches.next() {
93 for capture in m.captures {
94 let cn = capture_names[capture.index as usize];
95 if !matches!(cn, "definition.function" | "definition.method") {
96 continue;
97 }
98
99 let node = capture.node;
100 let name = match support.node_name(&node, &content) {
101 Some(n) => n.to_string(),
102 None => continue,
103 };
104
105 let start_line = node.start_position().row + 1;
106 let end_line = node.end_position().row + 1;
107 let line_count = end_line.saturating_sub(start_line) + 1;
108
109 if line_count >= self.threshold {
110 results.push(LongFunctionFinding {
111 rel_path: rel_path.clone(),
112 name,
113 start_line,
114 line_count,
115 });
116 }
117 }
118 }
119
120 results
121 }
122
123 fn to_diagnostics(
124 &self,
125 findings: Vec<(std::path::PathBuf, Vec<Self::Finding>)>,
126 _root: &Path,
127 files_checked: usize,
128 ) -> DiagnosticsReport {
129 let threshold = self.threshold;
130
131 let mut issues: Vec<Issue> = findings
132 .into_iter()
133 .flat_map(|(_path, file_findings)| file_findings)
134 .map(|f| Issue {
135 file: f.rel_path,
136 line: Some(f.start_line),
137 column: None,
138 end_line: None,
139 end_column: None,
140 rule_id: "long-function".into(),
141 message: format!(
142 "function `{}` is {} lines (threshold: {threshold})",
143 f.name, f.line_count
144 ),
145 severity: Severity::Warning,
146 source: "long-function".into(),
147 related: vec![],
148 suggestion: Some(
149 "consider breaking this function into smaller, focused functions".into(),
150 ),
151 })
152 .collect();
153
154 issues.sort_by(|a, b| {
156 let extract = |msg: &str| -> usize {
157 msg.split(" is ")
158 .nth(1)
159 .and_then(|s| s.split(' ').next())
160 .and_then(|s| s.parse().ok())
161 .unwrap_or(0)
162 };
163 extract(&b.message).cmp(&extract(&a.message))
164 });
165
166 DiagnosticsReport {
167 issues,
168 files_checked,
169 sources_run: vec!["long-function".into()],
170 tool_errors: vec![],
171 daemon_cached: false,
172 }
173 }
174}
175
176pub fn build_long_function_report(
181 root: &Path,
182 threshold: usize,
183 explicit_files: Option<&[std::path::PathBuf]>,
184 walk_config: &WalkConfig,
185) -> DiagnosticsReport {
186 let rule = LongFunctionRule { threshold };
187 run_file_rule(&rule, root, explicit_files, walk_config)
188}
189
190#[cfg(test)]
191mod tests {
192 use super::*;
193 use std::io::Write as _;
194
195 fn make_python_function(
197 dir: &std::path::Path,
198 name: &str,
199 body_lines: usize,
200 ) -> std::path::PathBuf {
201 let path = dir.join(name);
202 let mut f = std::fs::File::create(&path).unwrap();
203 writeln!(f, "def long_function():").unwrap();
204 for i in 0..body_lines {
205 writeln!(f, " x = {i}").unwrap();
206 }
207 path
208 }
209
210 #[test]
211 fn test_default_threshold_not_triggered() {
212 let dir = tempfile::tempdir().unwrap();
213 let path = make_python_function(dir.path(), "short.py", 98);
215 let rule = LongFunctionRule { threshold: 100 };
216 let findings = rule.check_file(&path, dir.path());
217 assert!(
218 findings.is_empty(),
219 "99-line function should not trigger default threshold of 100"
220 );
221 }
222
223 #[test]
224 fn test_default_threshold_triggered() {
225 let dir = tempfile::tempdir().unwrap();
226 let path = make_python_function(dir.path(), "long.py", 100);
228 let rule = LongFunctionRule { threshold: 100 };
229 let findings = rule.check_file(&path, dir.path());
230 assert!(
231 !findings.is_empty(),
232 "100-line function should trigger threshold of 100"
233 );
234 }
235
236 #[test]
237 fn test_custom_threshold_lower() {
238 let dir = tempfile::tempdir().unwrap();
239 let path = make_python_function(dir.path(), "medium.py", 30);
241 let rule = LongFunctionRule { threshold: 20 };
242 let findings = rule.check_file(&path, dir.path());
243 assert!(
244 !findings.is_empty(),
245 "30-line function should trigger custom threshold of 20"
246 );
247 }
248
249 #[test]
250 fn test_custom_threshold_higher() {
251 let dir = tempfile::tempdir().unwrap();
252 let path = make_python_function(dir.path(), "medium.py", 100);
254 let rule = LongFunctionRule { threshold: 200 };
255 let findings = rule.check_file(&path, dir.path());
256 assert!(
257 findings.is_empty(),
258 "100-line function should not trigger custom threshold of 200"
259 );
260 }
261}