1use crate::metrics::types::{RepoAnalysis, SymbolFact};
5use anyhow::Result;
6use std::path::Path;
7use tree_sitter::{Language, Node, Parser};
8
9pub trait CodeAnalyzer {
10 fn analyze(&self, rel_path: &str, source: &str) -> Result<RepoAnalysis>;
11}
12
13pub fn analyzer_for(path: &Path) -> Box<dyn CodeAnalyzer> {
14 language_spec(path)
15 .map(|spec| Box::new(TreeSitterAnalyzer { spec }) as Box<dyn CodeAnalyzer>)
16 .unwrap_or_else(|| Box::new(GenericAnalyzer))
17}
18
19pub struct GenericAnalyzer;
20
21impl CodeAnalyzer for GenericAnalyzer {
22 fn analyze(&self, rel_path: &str, source: &str) -> Result<RepoAnalysis> {
23 let lines: Vec<&str> = source.lines().collect();
24 Ok(RepoAnalysis {
25 path: rel_path.into(),
26 language: language_name(Path::new(rel_path)).into(),
27 bytes: source.len() as u64,
28 loc: lines.len() as u32,
29 sloc: lines.iter().filter(|line| !line.trim().is_empty()).count() as u32,
30 complexity_total: 0,
31 max_fn_complexity: 0,
32 imports: vec![],
33 symbols: vec![],
34 })
35 }
36}
37
38struct TreeSitterAnalyzer {
39 spec: &'static LanguageSpec,
40}
41
42impl CodeAnalyzer for TreeSitterAnalyzer {
43 fn analyze(&self, rel_path: &str, source: &str) -> Result<RepoAnalysis> {
44 let mut parser = Parser::new();
45 parser.set_language(&(self.spec.language)())?;
46 let tree = parser.parse(source, None).expect("tree-sitter parse");
47 let root = tree.root_node();
48 let bytes = source.as_bytes();
49 let mut symbols = vec![];
50 collect_symbols(root, bytes, self.spec, &mut symbols);
51 let imports = collect_kind_text(root, bytes, self.spec.import_kinds)
52 .into_iter()
53 .flat_map(|raw| extract_import_targets(&raw))
54 .collect::<Vec<_>>();
55 let symbol_ranges = symbols
56 .iter()
57 .map(|s| (s.start_byte, s.end_byte))
58 .collect::<Vec<_>>();
59 let top_level = count_top_level_complexity(root, bytes, self.spec, &symbol_ranges);
60 let sum_symbols = symbols.iter().map(|s| s.complexity).sum::<u32>();
61 Ok(RepoAnalysis {
62 path: rel_path.into(),
63 language: self.spec.name.into(),
64 bytes: source.len() as u64,
65 loc: source.lines().count() as u32,
66 sloc: source
67 .lines()
68 .filter(|line| !line.trim().is_empty())
69 .count() as u32,
70 complexity_total: sum_symbols + top_level,
71 max_fn_complexity: symbols.iter().map(|s| s.complexity).max().unwrap_or(0),
72 imports,
73 symbols,
74 })
75 }
76}
77
78struct LanguageSpec {
79 name: &'static str,
80 language: fn() -> Language,
81 symbol_kinds: &'static [&'static str],
82 import_kinds: &'static [&'static str],
83 call_kinds: &'static [&'static str],
84 branch_kinds: &'static [&'static str],
85}
86
87fn language_spec(path: &Path) -> Option<&'static LanguageSpec> {
88 static SPECS: &[LanguageSpec] = &[
89 LanguageSpec {
90 name: "rust",
91 language: || tree_sitter_rust::LANGUAGE.into(),
92 symbol_kinds: &[
93 "function_item",
94 "struct_item",
95 "enum_item",
96 "impl_item",
97 "trait_item",
98 ],
99 import_kinds: &["use_declaration"],
100 call_kinds: &["call_expression", "macro_invocation"],
101 branch_kinds: &[
102 "if_expression",
103 "for_expression",
104 "loop_expression",
105 "while_expression",
106 "match_expression",
107 ],
108 },
109 LanguageSpec {
110 name: "typescript",
111 language: || tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
112 symbol_kinds: &[
113 "function_declaration",
114 "method_definition",
115 "class_declaration",
116 "interface_declaration",
117 ],
118 import_kinds: &["import_statement"],
119 call_kinds: &["call_expression"],
120 branch_kinds: &[
121 "if_statement",
122 "for_statement",
123 "for_in_statement",
124 "while_statement",
125 "switch_statement",
126 "ternary_expression",
127 "logical_expression",
128 ],
129 },
130 LanguageSpec {
131 name: "javascript",
132 language: || tree_sitter_javascript::LANGUAGE.into(),
133 symbol_kinds: &[
134 "function_declaration",
135 "method_definition",
136 "class_declaration",
137 ],
138 import_kinds: &["import_statement"],
139 call_kinds: &["call_expression"],
140 branch_kinds: &[
141 "if_statement",
142 "for_statement",
143 "for_in_statement",
144 "while_statement",
145 "switch_statement",
146 "ternary_expression",
147 "logical_expression",
148 ],
149 },
150 LanguageSpec {
151 name: "python",
152 language: || tree_sitter_python::LANGUAGE.into(),
153 symbol_kinds: &["function_definition", "class_definition"],
154 import_kinds: &["import_statement", "import_from_statement"],
155 call_kinds: &["call"],
156 branch_kinds: &[
157 "if_statement",
158 "for_statement",
159 "while_statement",
160 "conditional_expression",
161 ],
162 },
163 LanguageSpec {
164 name: "go",
165 language: || tree_sitter_go::LANGUAGE.into(),
166 symbol_kinds: &[
167 "function_declaration",
168 "method_declaration",
169 "type_declaration",
170 ],
171 import_kinds: &["import_declaration"],
172 call_kinds: &["call_expression"],
173 branch_kinds: &[
174 "if_statement",
175 "for_statement",
176 "expression_switch_statement",
177 "type_switch_statement",
178 "select_statement",
179 ],
180 },
181 LanguageSpec {
182 name: "java",
183 language: || tree_sitter_java::LANGUAGE.into(),
184 symbol_kinds: &[
185 "method_declaration",
186 "class_declaration",
187 "interface_declaration",
188 "enum_declaration",
189 ],
190 import_kinds: &["import_declaration"],
191 call_kinds: &["method_invocation"],
192 branch_kinds: &[
193 "if_statement",
194 "for_statement",
195 "enhanced_for_statement",
196 "while_statement",
197 "switch_expression",
198 "switch_block",
199 ],
200 },
201 ];
202 let name = language_name(path);
203 SPECS.iter().find(|spec| spec.name == name)
204}
205
206fn language_name(path: &Path) -> &'static str {
207 match path.extension().and_then(|ext| ext.to_str()).unwrap_or("") {
208 "rs" => "rust",
209 "ts" | "tsx" => "typescript",
210 "js" | "jsx" | "mjs" | "cjs" => "javascript",
211 "py" => "python",
212 "go" => "go",
213 "java" => "java",
214 _ => "generic",
215 }
216}
217
218fn collect_symbols(node: Node<'_>, source: &[u8], spec: &LanguageSpec, out: &mut Vec<SymbolFact>) {
219 if spec.symbol_kinds.iter().any(|kind| *kind == node.kind()) {
220 let calls = collect_kind_text(node, source, spec.call_kinds)
221 .into_iter()
222 .filter_map(|raw| call_name(&raw))
223 .collect::<Vec<_>>();
224 out.push(SymbolFact {
225 path: String::new(),
226 name: symbol_name(node, source),
227 kind: node.kind().into(),
228 complexity: 1 + count_complexity(node, spec),
229 calls,
230 start_byte: node.start_byte(),
231 end_byte: node.end_byte(),
232 });
233 }
234 let mut cursor = node.walk();
235 for child in node.children(&mut cursor) {
236 collect_symbols(child, source, spec, out);
237 }
238}
239
240fn symbol_name(node: Node<'_>, source: &[u8]) -> String {
241 let mut cursor = node.walk();
242 for child in node.children(&mut cursor) {
243 if (child.kind().contains("identifier") || child.kind().ends_with("name"))
244 && let Ok(text) = child.utf8_text(source)
245 {
246 return text.trim().to_string();
247 }
248 }
249 node.kind().into()
250}
251
252fn collect_kind_text(node: Node<'_>, source: &[u8], kinds: &[&str]) -> Vec<String> {
253 let mut out = vec![];
254 collect_kind_text_inner(node, source, kinds, &mut out);
255 out
256}
257
258fn collect_kind_text_inner(node: Node<'_>, source: &[u8], kinds: &[&str], out: &mut Vec<String>) {
259 if kinds.iter().any(|kind| *kind == node.kind())
260 && let Ok(text) = node.utf8_text(source)
261 {
262 out.push(text.to_string());
263 }
264 let mut cursor = node.walk();
265 for child in node.children(&mut cursor) {
266 collect_kind_text_inner(child, source, kinds, out);
267 }
268}
269
270fn count_complexity(node: Node<'_>, spec: &LanguageSpec) -> u32 {
271 let mut count = if spec.branch_kinds.iter().any(|kind| *kind == node.kind()) {
272 1
273 } else {
274 0
275 };
276 let mut cursor = node.walk();
277 for child in node.children(&mut cursor) {
278 count += count_complexity(child, spec);
279 }
280 count
281}
282
283fn count_top_level_complexity(
284 root: Node<'_>,
285 _source: &[u8],
286 spec: &LanguageSpec,
287 symbol_ranges: &[(usize, usize)],
288) -> u32 {
289 collect_kind_nodes(root, spec.branch_kinds)
290 .into_iter()
291 .filter(|node| !inside_symbol(node.start_byte(), symbol_ranges))
292 .count() as u32
293}
294
295fn collect_kind_nodes<'a>(node: Node<'a>, kinds: &[&str]) -> Vec<Node<'a>> {
296 let mut out = vec![];
297 collect_kind_nodes_inner(node, kinds, &mut out);
298 out
299}
300
301fn collect_kind_nodes_inner<'a>(node: Node<'a>, kinds: &[&str], out: &mut Vec<Node<'a>>) {
302 if kinds.iter().any(|kind| *kind == node.kind()) {
303 out.push(node);
304 }
305 let mut cursor = node.walk();
306 for child in node.children(&mut cursor) {
307 collect_kind_nodes_inner(child, kinds, out);
308 }
309}
310
311fn inside_symbol(byte: usize, ranges: &[(usize, usize)]) -> bool {
312 ranges
313 .iter()
314 .any(|(start, end)| *start <= byte && byte < *end)
315}
316
317fn extract_import_targets(raw: &str) -> Vec<String> {
318 let mut out = vec![];
319 for quote in ['"', '\''] {
320 if let Some(rest) = raw.split(quote).nth(1) {
321 out.push(rest.to_string());
322 return out;
323 }
324 }
325 let cleaned = raw
326 .replace("use ", "")
327 .replace("import ", "")
328 .replace("from ", "")
329 .replace(';', "");
330 let target = cleaned
331 .split_whitespace()
332 .next()
333 .unwrap_or("")
334 .trim_matches('{')
335 .trim_matches('}')
336 .trim();
337 if !target.is_empty() {
338 out.push(target.into());
339 }
340 out
341}
342
343fn call_name(raw: &str) -> Option<String> {
344 let head = raw.split('(').next()?.trim();
345 let name = head.rsplit(['.', ':']).next()?.trim();
346 if name.is_empty() {
347 return None;
348 }
349 Some(name.into())
350}