1use std::collections::hash_map::DefaultHasher;
2use std::hash::{Hash, Hasher};
3
4use cha_core::{ClassInfo, FunctionInfo, ImportInfo, SourceFile, SourceModel};
5use tree_sitter::{Node, Parser};
6
7use crate::LanguageParser;
8
9pub struct RustParser;
10
11impl LanguageParser for RustParser {
12 fn language_name(&self) -> &str {
13 "rust"
14 }
15
16 fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
17 let mut parser = Parser::new();
18 parser
19 .set_language(&tree_sitter_rust::LANGUAGE.into())
20 .ok()?;
21 let tree = parser.parse(&file.content, None)?;
22 let root = tree.root_node();
23 let src = file.content.as_bytes();
24
25 let mut ctx = ParseContext::new(src);
26 ctx.collect_nodes(root, false);
27
28 Some(SourceModel {
29 language: "rust".into(),
30 total_lines: file.line_count(),
31 functions: ctx.col.functions,
32 classes: ctx.col.classes,
33 imports: ctx.col.imports,
34 comments: collect_comments(root, src),
35 type_aliases: vec![], })
37 }
38}
39
40struct Collector {
42 functions: Vec<FunctionInfo>,
43 classes: Vec<ClassInfo>,
44 imports: Vec<ImportInfo>,
45}
46
47struct ParseContext<'a> {
49 src: &'a [u8],
50 col: Collector,
51 last_self_call_count: usize,
52 last_has_notify: bool,
53 callback_fields: std::collections::HashMap<String, Vec<String>>,
55}
56
57impl<'a> ParseContext<'a> {
58 fn new(src: &'a [u8]) -> Self {
59 Self {
60 src,
61 last_self_call_count: 0,
62 last_has_notify: false,
63 callback_fields: std::collections::HashMap::new(),
64 col: Collector {
65 functions: Vec::new(),
66 classes: Vec::new(),
67 imports: Vec::new(),
68 },
69 }
70 }
71
72 fn collect_nodes(&mut self, node: Node, exported: bool) {
73 let mut cursor = node.walk();
74 for child in node.children(&mut cursor) {
75 self.collect_single_node(child, exported);
76 }
77 }
78
79 fn collect_single_node(&mut self, child: Node, exported: bool) {
80 match child.kind() {
81 "function_item" => self.push_function(child, exported),
82 "impl_item" => self.extract_impl_methods(child),
83 "struct_item" | "enum_item" | "trait_item" => self.push_struct(child),
84 "use_declaration" => self.push_import(child),
85 _ => self.collect_nodes(child, false),
86 }
87 }
88
89 fn push_function(&mut self, node: Node, exported: bool) {
90 if let Some(mut f) = extract_function(node, self.src) {
91 f.is_exported = exported || has_pub(node);
92 self.col.functions.push(f);
93 }
94 }
95
96 fn push_struct(&mut self, node: Node) {
97 if let Some((mut c, cb_fields)) = extract_struct(node, self.src) {
98 c.is_exported = has_pub(node);
99 if !cb_fields.is_empty() {
100 self.callback_fields.insert(c.name.clone(), cb_fields);
101 }
102 self.col.classes.push(c);
103 }
104 }
105
106 fn push_import(&mut self, node: Node) {
107 if let Some(i) = extract_use(node, self.src) {
108 self.col.imports.push(i);
109 }
110 }
111
112 fn extract_impl_methods(&mut self, node: Node) {
113 let Some(body) = node.child_by_field_name("body") else {
114 return;
115 };
116 let impl_name = node
117 .child_by_field_name("type")
118 .map(|t| node_text(t, self.src).to_string());
119 let trait_name = node
120 .child_by_field_name("trait")
121 .map(|t| node_text(t, self.src).to_string());
122
123 let cb_fields = impl_name
124 .as_ref()
125 .and_then(|n| self.callback_fields.get(n))
126 .cloned()
127 .unwrap_or_default();
128
129 let (methods, delegating, has_behavior) = self.scan_impl_body(body, &cb_fields);
130
131 if let Some(name) = &impl_name
132 && let Some(class) = self.col.classes.iter_mut().find(|c| &c.name == name)
133 {
134 class.method_count += methods;
135 class.delegating_method_count += delegating;
136 class.has_behavior |= has_behavior;
137 class.self_call_count = class.self_call_count.max(self.last_self_call_count);
138 class.has_notify_method |= self.last_has_notify;
139 if let Some(t) = &trait_name {
140 class.parent_name = Some(t.clone());
141 }
142 }
143 }
144
145 fn scan_impl_body(&mut self, body: Node, cb_fields: &[String]) -> (usize, usize, bool) {
146 let mut methods = 0;
147 let mut delegating = 0;
148 let mut has_behavior = false;
149 let mut max_self_calls = 0;
150 let mut has_notify = false;
151 let mut cursor = body.walk();
152 for child in body.children(&mut cursor) {
153 if child.kind() == "function_item"
154 && let Some(mut f) = extract_function(child, self.src)
155 {
156 f.is_exported = has_pub(child);
157 methods += 1;
158 if f.is_delegating {
159 delegating += 1;
160 }
161 if f.line_count > 3 {
162 has_behavior = true;
163 }
164 let fn_body = child.child_by_field_name("body");
165 let self_calls = count_self_method_calls(fn_body, self.src);
166 max_self_calls = max_self_calls.max(self_calls);
167 if !has_notify && has_iterate_and_call(fn_body, self.src, cb_fields) {
169 has_notify = true;
170 }
171 self.col.functions.push(f);
172 }
173 }
174 self.last_self_call_count = max_self_calls;
176 self.last_has_notify = has_notify;
177 (methods, delegating, has_behavior)
178 }
179}
180
181fn node_text<'a>(node: Node, src: &'a [u8]) -> &'a str {
183 node.utf8_text(src).unwrap_or("")
184}
185
186fn has_pub(node: Node) -> bool {
187 let mut cursor = node.walk();
188 node.children(&mut cursor)
189 .any(|c| c.kind() == "visibility_modifier")
190}
191
192fn hash_ast_structure(node: Node) -> u64 {
193 let mut hasher = DefaultHasher::new();
194 walk_hash(node, &mut hasher);
195 hasher.finish()
196}
197
198fn walk_hash(node: Node, hasher: &mut DefaultHasher) {
199 node.kind().hash(hasher);
200 let mut cursor = node.walk();
201 for child in node.children(&mut cursor) {
202 walk_hash(child, hasher);
203 }
204}
205
206fn count_complexity(node: Node) -> usize {
207 let mut complexity = 1;
208 walk_complexity(node, &mut complexity);
209 complexity
210}
211
212fn walk_complexity(node: Node, count: &mut usize) {
213 match node.kind() {
214 "if_expression" | "else_clause" | "for_expression" | "while_expression"
215 | "loop_expression" | "match_arm" | "closure_expression" => {
216 *count += 1;
217 }
218 "binary_expression" => {
219 let mut cursor = node.walk();
220 for child in node.children(&mut cursor) {
221 if child.kind() == "&&" || child.kind() == "||" {
222 *count += 1;
223 }
224 }
225 }
226 _ => {}
227 }
228 let mut cursor = node.walk();
229 for child in node.children(&mut cursor) {
230 walk_complexity(child, count);
231 }
232}
233
234fn extract_function(node: Node, src: &[u8]) -> Option<FunctionInfo> {
235 let name_node = node.child_by_field_name("name")?;
236 let name = node_text(name_node, src).to_string();
237 let start_line = node.start_position().row + 1;
238 let end_line = node.end_position().row + 1;
239 let body = node.child_by_field_name("body");
240 let body_hash = body.map(hash_ast_structure);
241 let parameter_count = count_parameters(node);
242 let parameter_types = extract_param_types(node, src);
243 let chain_depth = body.map(max_chain_depth).unwrap_or(0);
244 let switch_arms = body.map(count_switch_arms).unwrap_or(0);
245 let external_refs = body
246 .map(|b| collect_external_refs(b, src))
247 .unwrap_or_default();
248 let is_delegating = body.map(|b| check_delegating(b, src)).unwrap_or(false);
249 Some(FunctionInfo {
250 name,
251 start_line,
252 end_line,
253 line_count: end_line - start_line + 1,
254 complexity: count_complexity(node),
255 body_hash,
256 is_exported: false,
257 parameter_count,
258 parameter_types,
259 chain_depth,
260 switch_arms,
261 external_refs,
262 is_delegating,
263 comment_lines: count_comment_lines(node, src),
264 referenced_fields: collect_field_refs(body, src),
265 null_check_fields: collect_null_checks(body, src),
266 switch_dispatch_target: extract_switch_target(body, src),
267 optional_param_count: count_optional_params(node, src),
268 called_functions: collect_calls_rs(body, src),
269 cognitive_complexity: body.map(cognitive_complexity_rs).unwrap_or(0),
270 })
271}
272
273fn extract_struct(node: Node, src: &[u8]) -> Option<(ClassInfo, Vec<String>)> {
274 let name_node = node.child_by_field_name("name")?;
275 let name = node_text(name_node, src).to_string();
276 let start_line = node.start_position().row + 1;
277 let end_line = node.end_position().row + 1;
278 let (field_count, field_names, callback_fields) = extract_fields(node, src);
279 let is_interface = node.kind() == "trait_item";
280 let has_listener_field = !callback_fields.is_empty();
281 Some((
282 ClassInfo {
283 name,
284 start_line,
285 end_line,
286 method_count: 0,
287 line_count: end_line - start_line + 1,
288 is_exported: false,
289 delegating_method_count: 0,
290 field_count,
291 field_names,
292 field_types: Vec::new(),
293 has_behavior: false,
294 is_interface,
295 parent_name: None,
296 override_count: 0,
297 self_call_count: 0,
298 has_listener_field,
299 has_notify_method: false,
300 },
301 callback_fields,
302 ))
303}
304
305fn count_parameters(node: Node) -> usize {
306 let params = match node.child_by_field_name("parameters") {
307 Some(p) => p,
308 None => return 0,
309 };
310 let mut cursor = params.walk();
311 params
312 .children(&mut cursor)
313 .filter(|c| c.kind() == "parameter" || c.kind() == "self_parameter")
314 .count()
315}
316
317fn extract_param_types(node: Node, src: &[u8]) -> Vec<String> {
318 let params = match node.child_by_field_name("parameters") {
319 Some(p) => p,
320 None => return vec![],
321 };
322 let mut types = Vec::new();
323 let mut cursor = params.walk();
324 for child in params.children(&mut cursor) {
325 if child.kind() == "parameter"
326 && let Some(ty) = child.child_by_field_name("type")
327 {
328 types.push(normalize_type(node_text(ty, src)));
329 }
330 }
331 types.sort();
332 types
333}
334
335fn normalize_type(raw: &str) -> String {
337 raw.trim_start_matches('&')
338 .trim_start_matches("mut ")
339 .trim()
340 .to_string()
341}
342
343fn max_chain_depth(node: Node) -> usize {
344 let mut max = 0;
345 walk_chain_depth(node, &mut max);
346 max
347}
348
349fn walk_chain_depth(node: Node, max: &mut usize) {
350 if node.kind() == "field_expression" {
351 let depth = measure_chain(node);
352 if depth > *max {
353 *max = depth;
354 }
355 }
356 let mut cursor = node.walk();
357 for child in node.children(&mut cursor) {
358 walk_chain_depth(child, max);
359 }
360}
361
362fn measure_chain(node: Node) -> usize {
364 let mut depth = 0;
365 let mut current = node;
366 while current.kind() == "field_expression" {
367 depth += 1;
368 if let Some(obj) = current.child_by_field_name("value") {
369 current = obj;
370 } else {
371 break;
372 }
373 }
374 depth
375}
376
377fn count_switch_arms(node: Node) -> usize {
378 let mut count = 0;
379 walk_switch_arms(node, &mut count);
380 count
381}
382
383fn walk_switch_arms(node: Node, count: &mut usize) {
384 if node.kind() == "match_arm" {
385 *count += 1;
386 }
387 let mut cursor = node.walk();
388 for child in node.children(&mut cursor) {
389 walk_switch_arms(child, count);
390 }
391}
392
393fn collect_external_refs(node: Node, src: &[u8]) -> Vec<String> {
394 let mut refs = Vec::new();
395 walk_external_refs(node, src, &mut refs);
396 refs.sort();
397 refs.dedup();
398 refs
399}
400
401fn field_chain_root(node: Node) -> Node {
403 let mut current = node;
404 while current.kind() == "field_expression" {
405 match current.child_by_field_name("value") {
406 Some(child) => current = child,
407 None => break,
408 }
409 }
410 current
411}
412
413fn walk_external_refs(node: Node, src: &[u8], refs: &mut Vec<String>) {
414 if node.kind() == "field_expression" {
415 let root = field_chain_root(node);
417 let text = node_text(root, src);
418 if text != "self" && !text.is_empty() {
419 refs.push(text.to_string());
420 }
421 }
422 let mut cursor = node.walk();
423 for child in node.children(&mut cursor) {
424 walk_external_refs(child, src, refs);
425 }
426}
427
428fn single_stmt(body: Node) -> Option<Node> {
430 let mut cursor = body.walk();
431 let stmts: Vec<_> = body
432 .children(&mut cursor)
433 .filter(|c| c.kind() != "{" && c.kind() != "}")
434 .collect();
435 (stmts.len() == 1).then(|| stmts[0])
436}
437
438fn is_external_call(node: Node, src: &[u8]) -> bool {
440 node.kind() == "call_expression"
441 && node.child_by_field_name("function").is_some_and(|func| {
442 func.kind() == "field_expression"
443 && func
444 .child_by_field_name("value")
445 .is_some_and(|obj| node_text(obj, src) != "self")
446 })
447}
448
449fn check_delegating(body: Node, src: &[u8]) -> bool {
450 let Some(stmt) = single_stmt(body) else {
451 return false;
452 };
453 let expr = match stmt.kind() {
454 "expression_statement" => stmt.child(0).unwrap_or(stmt),
455 "return_expression" => stmt.child(1).unwrap_or(stmt),
456 _ => stmt,
457 };
458 is_external_call(expr, src)
459}
460
461fn extract_use(node: Node, src: &[u8]) -> Option<ImportInfo> {
462 let text = node_text(node, src);
463 let source = text
465 .strip_prefix("use ")?
466 .trim_end_matches(';')
467 .trim()
468 .to_string();
469 Some(ImportInfo {
470 source,
471 line: node.start_position().row + 1,
472 })
473}
474
475fn count_comment_lines(node: Node, src: &[u8]) -> usize {
477 let mut count = 0;
478 let mut cursor = node.walk();
479 for child in node.children(&mut cursor) {
480 if child.kind() == "line_comment" || child.kind() == "block_comment" {
481 count += child.end_position().row - child.start_position().row + 1;
482 }
483 }
484 if let Some(body) = node.child_by_field_name("body") {
486 count += count_comment_lines_recursive(body, src);
487 }
488 count
489}
490
491fn count_comment_lines_recursive(node: Node, _src: &[u8]) -> usize {
492 let mut count = 0;
493 let mut cursor = node.walk();
494 for child in node.children(&mut cursor) {
495 if child.kind() == "line_comment" || child.kind() == "block_comment" {
496 count += child.end_position().row - child.start_position().row + 1;
497 } else if child.child_count() > 0 {
498 count += count_comment_lines_recursive(child, _src);
499 }
500 }
501 count
502}
503
504fn collect_field_refs(body: Option<Node>, src: &[u8]) -> Vec<String> {
507 let Some(body) = body else { return vec![] };
508 let mut refs = Vec::new();
509 collect_self_fields(body, src, &mut refs);
510 refs.sort();
511 refs.dedup();
512 refs
513}
514
515fn collect_self_fields(node: Node, src: &[u8], refs: &mut Vec<String>) {
516 if node.kind() == "field_expression"
517 && let Some(obj) = node.child_by_field_name("value")
518 && node_text(obj, src) == "self"
519 && let Some(field) = node.child_by_field_name("field")
520 {
521 refs.push(node_text(field, src).to_string());
522 }
523 let mut cursor = node.walk();
524 for child in node.children(&mut cursor) {
525 collect_self_fields(child, src, refs);
526 }
527}
528
529fn extract_fields(node: Node, src: &[u8]) -> (usize, Vec<String>, Vec<String>) {
532 let mut names = Vec::new();
533 let mut callback_fields = Vec::new();
534 if let Some(body) = node.child_by_field_name("body") {
535 let mut cursor = body.walk();
536 for child in body.children(&mut cursor) {
537 if child.kind() == "field_declaration"
538 && let Some(name_node) = child.child_by_field_name("name")
539 {
540 let name = node_text(name_node, src).to_string();
541 if let Some(ty) = child.child_by_field_name("type")
542 && is_callback_collection_type_rs(node_text(ty, src))
543 {
544 callback_fields.push(name.clone());
545 }
546 names.push(name);
547 }
548 }
549 }
550 (names.len(), names, callback_fields)
551}
552
553fn is_callback_collection_type_rs(ty: &str) -> bool {
555 if !ty.contains("Vec<") {
556 return false;
557 }
558 ty.contains("Fn(") || ty.contains("FnMut(") || ty.contains("FnOnce(") || ty.contains("fn(")
559}
560
561fn collect_null_checks(body: Option<Node>, src: &[u8]) -> Vec<String> {
563 let Some(body) = body else { return vec![] };
564 let mut fields = Vec::new();
565 walk_null_checks_rs(body, src, &mut fields);
566 fields.sort();
567 fields.dedup();
568 fields
569}
570
571fn walk_null_checks_rs(node: Node, src: &[u8], fields: &mut Vec<String>) {
572 if node.kind() == "if_let_expression" {
573 if let Some(pattern) = node.child_by_field_name("pattern")
575 && node_text(pattern, src).contains("Some")
576 && let Some(value) = node.child_by_field_name("value")
577 {
578 let vtext = node_text(value, src);
579 if let Some(f) = vtext.strip_prefix("self.") {
580 fields.push(f.to_string());
581 }
582 }
583 } else if node.kind() == "if_expression"
584 && let Some(cond) = node.child_by_field_name("condition")
585 {
586 let text = node_text(cond, src);
587 if text.contains("is_some") || text.contains("is_none") {
588 extract_null_checked_fields(text, fields);
589 }
590 }
591 let mut cursor = node.walk();
592 for child in node.children(&mut cursor) {
593 walk_null_checks_rs(child, src, fields);
594 }
595}
596
597fn extract_null_checked_fields(text: &str, fields: &mut Vec<String>) {
599 if !(text.contains("is_some") || text.contains("is_none") || text.contains("Some")) {
600 return;
601 }
602 for part in text.split("self.") {
603 if let Some(field) = part
604 .split(|c: char| !c.is_alphanumeric() && c != '_')
605 .next()
606 && !field.is_empty()
607 && field != "is_some"
608 && field != "is_none"
609 {
610 fields.push(field.to_string());
611 }
612 }
613}
614
615fn extract_switch_target(body: Option<Node>, src: &[u8]) -> Option<String> {
617 let body = body?;
618 find_match_target(body, src)
619}
620
621fn find_match_target(node: Node, src: &[u8]) -> Option<String> {
622 if node.kind() == "match_expression"
623 && let Some(value) = node.child_by_field_name("value")
624 {
625 return Some(node_text(value, src).to_string());
626 }
627 let mut cursor = node.walk();
628 for child in node.children(&mut cursor) {
629 if let Some(t) = find_match_target(child, src) {
630 return Some(t);
631 }
632 }
633 None
634}
635
636fn count_optional_params(node: Node, src: &[u8]) -> usize {
638 let Some(params) = node.child_by_field_name("parameters") else {
639 return 0;
640 };
641 let mut count = 0;
642 let mut cursor = params.walk();
643 for child in params.children(&mut cursor) {
644 if child.kind() == "parameter" {
645 let text = node_text(child, src);
646 if text.contains("Option<") {
647 count += 1;
648 }
649 }
650 }
651 count
652}
653
654fn count_self_method_calls(body: Option<Node>, src: &[u8]) -> usize {
656 let Some(body) = body else { return 0 };
657 let mut count = 0;
658 walk_self_calls(body, src, &mut count);
659 count
660}
661
662fn walk_self_calls(node: Node, src: &[u8], count: &mut usize) {
663 if node.kind() == "call_expression"
664 && let Some(func) = node.child_by_field_name("function")
665 && node_text(func, src).starts_with("self.")
666 {
667 *count += 1;
668 }
669 let mut cursor = node.walk();
670 for child in node.children(&mut cursor) {
671 walk_self_calls(child, src, count);
672 }
673}
674
675fn has_iterate_and_call(body: Option<Node>, src: &[u8], cb_fields: &[String]) -> bool {
678 let Some(body) = body else { return false };
679 for field in cb_fields {
680 let self_field = format!("self.{field}");
681 if walk_for_iterate_call(body, src, &self_field) {
682 return true;
683 }
684 }
685 false
686}
687
688fn walk_for_iterate_call(node: Node, src: &[u8], self_field: &str) -> bool {
689 if node.kind() == "for_expression"
691 && let Some(value) = node.child_by_field_name("value")
692 && node_text(value, src).contains(self_field)
693 && let Some(loop_body) = node.child_by_field_name("body")
694 && has_call_expression(loop_body)
695 {
696 return true;
697 }
698 if node.kind() == "call_expression" {
700 let text = node_text(node, src);
701 if text.contains(self_field) && text.contains("for_each") {
702 return true;
703 }
704 }
705 let mut cursor = node.walk();
706 for child in node.children(&mut cursor) {
707 if walk_for_iterate_call(child, src, self_field) {
708 return true;
709 }
710 }
711 false
712}
713
714fn cognitive_complexity_rs(node: Node) -> usize {
715 let mut score = 0;
716 cc_walk_rs(node, 0, &mut score);
717 score
718}
719
720fn cc_walk_rs(node: Node, nesting: usize, score: &mut usize) {
721 match node.kind() {
722 "if_expression" => {
723 *score += 1 + nesting;
724 cc_children_rs(node, nesting + 1, score);
725 return;
726 }
727 "for_expression" | "while_expression" | "loop_expression" => {
728 *score += 1 + nesting;
729 cc_children_rs(node, nesting + 1, score);
730 return;
731 }
732 "match_expression" => {
733 *score += 1 + nesting;
734 cc_children_rs(node, nesting + 1, score);
735 return;
736 }
737 "else_clause" => {
738 *score += 1;
739 }
740 "binary_expression" => {
741 if let Some(op) = node.child_by_field_name("operator")
742 && (op.kind() == "&&" || op.kind() == "||")
743 {
744 *score += 1;
745 }
746 }
747 "closure_expression" => {
748 cc_children_rs(node, nesting + 1, score);
749 return;
750 }
751 _ => {}
752 }
753 cc_children_rs(node, nesting, score);
754}
755
756fn cc_children_rs(node: Node, nesting: usize, score: &mut usize) {
757 let mut cursor = node.walk();
758 for child in node.children(&mut cursor) {
759 cc_walk_rs(child, nesting, score);
760 }
761}
762
763fn collect_calls_rs(body: Option<tree_sitter::Node>, src: &[u8]) -> Vec<String> {
764 let Some(body) = body else { return Vec::new() };
765 let mut calls = Vec::new();
766 let mut cursor = body.walk();
767 visit_all(body, &mut cursor, &mut |n| {
768 if n.kind() == "call_expression"
769 && let Some(func) = n.child(0)
770 {
771 let name = node_text(func, src).to_string();
772 if !calls.contains(&name) {
773 calls.push(name);
774 }
775 }
776 });
777 calls
778}
779
780fn visit_all<F: FnMut(Node)>(node: Node, cursor: &mut tree_sitter::TreeCursor, f: &mut F) {
781 f(node);
782 if cursor.goto_first_child() {
783 loop {
784 visit_all(cursor.node(), cursor, f);
785 if !cursor.goto_next_sibling() {
786 break;
787 }
788 }
789 cursor.goto_parent();
790 }
791}
792
793fn collect_comments(root: Node, src: &[u8]) -> Vec<cha_core::CommentInfo> {
794 let mut comments = Vec::new();
795 let mut cursor = root.walk();
796 visit_all(root, &mut cursor, &mut |n| {
797 if n.kind().contains("comment") {
798 comments.push(cha_core::CommentInfo {
799 text: node_text(n, src).to_string(),
800 line: n.start_position().row + 1,
801 });
802 }
803 });
804 comments
805}
806
807fn has_call_expression(node: Node) -> bool {
808 if node.kind() == "call_expression" {
809 return true;
810 }
811 let mut cursor = node.walk();
812 for child in node.children(&mut cursor) {
813 if has_call_expression(child) {
814 return true;
815 }
816 }
817 false
818}