1use std::collections::hash_map::DefaultHasher;
2use std::hash::{Hash, Hasher};
3
4use cha_core::{ClassInfo, FunctionInfo, ImportInfo, SourceFile, SourceModel};
5use tree_sitter::{Node, Parser};
6
7use crate::LanguageParser;
8
9pub struct CParser;
10pub struct CppParser;
11
12impl LanguageParser for CParser {
13 fn language_name(&self) -> &str {
14 "c"
15 }
16 fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
17 parse_c_like(file, "c", &tree_sitter_c::LANGUAGE.into())
18 }
19}
20
21impl LanguageParser for CppParser {
22 fn language_name(&self) -> &str {
23 "cpp"
24 }
25 fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
26 parse_c_like(file, "cpp", &tree_sitter_cpp::LANGUAGE.into())
27 }
28}
29
30fn parse_c_like(
31 file: &SourceFile,
32 lang: &str,
33 language: &tree_sitter::Language,
34) -> Option<SourceModel> {
35 let mut parser = Parser::new();
36 parser.set_language(language).ok()?;
37 let tree = parser.parse(&file.content, None)?;
38 let root = tree.root_node();
39 let src = file.content.as_bytes();
40
41 let mut functions = Vec::new();
42 let mut classes = Vec::new();
43 let mut imports = Vec::new();
44 let mut type_aliases = Vec::new();
45
46 let imports_map = crate::c_imports::build(root, src);
47 collect_top_level(
48 root,
49 src,
50 &imports_map,
51 &mut functions,
52 &mut classes,
53 &mut imports,
54 &mut type_aliases,
55 );
56
57 if is_header_file(file) {
62 for f in &mut functions {
63 f.is_exported = true;
64 }
65 }
66
67 Some(SourceModel {
68 language: lang.into(),
69 total_lines: file.line_count(),
70 functions,
71 classes,
72 imports,
73 comments: collect_comments(root, src),
74 type_aliases,
75 })
76}
77
78fn is_header_file(file: &SourceFile) -> bool {
79 file.path
80 .extension()
81 .is_some_and(|e| e == "h" || e == "hxx" || e == "hpp")
82}
83
84fn collect_top_level(
87 root: Node,
88 src: &[u8],
89 imports_map: &crate::type_ref::ImportsMap,
90 functions: &mut Vec<FunctionInfo>,
91 classes: &mut Vec<ClassInfo>,
92 imports: &mut Vec<ImportInfo>,
93 type_aliases: &mut Vec<(String, String)>,
94) {
95 let mut cursor = root.walk();
96 for child in root.children(&mut cursor) {
97 match child.kind() {
98 "function_definition" => {
99 handle_function_definition(child, src, imports_map, functions, classes);
100 }
101 "declaration" => {
102 if has_function_declarator(child)
110 && let Some(f) = extract_function(child, src, imports_map)
111 {
112 functions.push(f);
113 }
114 }
115 "struct_specifier" | "class_specifier" => {
116 if let Some(c) = extract_class(child, src) {
117 classes.push(c);
118 }
119 }
120 "type_definition" => {
121 extract_typedef_struct(child, src, classes, type_aliases);
122 }
123 "preproc_include" => {
124 if let Some(imp) = extract_include(child, src) {
125 imports.push(imp);
126 }
127 }
128 "namespace_definition" | "linkage_specification" | "template_declaration" => {
135 collect_top_level(
136 child,
137 src,
138 imports_map,
139 functions,
140 classes,
141 imports,
142 type_aliases,
143 );
144 }
145 _ => {
146 if child.child_count() > 0 {
147 collect_top_level(
148 child,
149 src,
150 imports_map,
151 functions,
152 classes,
153 imports,
154 type_aliases,
155 );
156 }
157 }
158 }
159 }
160}
161
162fn handle_function_definition(
166 node: Node,
167 src: &[u8],
168 imports_map: &crate::type_ref::ImportsMap,
169 functions: &mut Vec<FunctionInfo>,
170 classes: &mut Vec<ClassInfo>,
171) {
172 if let Some(c) = try_extract_macro_class(node, src) {
173 classes.push(c);
174 return;
175 }
176 let Some(f) = extract_function(node, src, imports_map) else {
177 return;
178 };
179 if let Some(q) = crate::cpp::extract_class_qualifier(node, src) {
180 crate::cpp::attach_to_class(&q, classes);
181 }
182 functions.push(f);
183}
184
185fn extract_typedef_struct(
186 node: Node,
187 src: &[u8],
188 classes: &mut Vec<ClassInfo>,
189 type_aliases: &mut Vec<(String, String)>,
190) {
191 let found_struct = register_typedef_struct_children(node, src, classes, type_aliases);
192 if !found_struct {
193 register_simple_typedef(node, src, type_aliases);
194 }
195}
196
197fn register_typedef_struct_children(
198 node: Node,
199 src: &[u8],
200 classes: &mut Vec<ClassInfo>,
201 type_aliases: &mut Vec<(String, String)>,
202) -> bool {
203 let mut found_struct = false;
204 let mut inner = node.walk();
205 for sub in node.children(&mut inner) {
206 if sub.kind() != "struct_specifier" && sub.kind() != "class_specifier" {
207 continue;
208 }
209 found_struct = true;
210 register_single_typedef_struct(node, sub, src, classes, type_aliases);
211 }
212 found_struct
213}
214
215fn register_single_typedef_struct(
216 typedef: Node,
217 sub: Node,
218 src: &[u8],
219 classes: &mut Vec<ClassInfo>,
220 type_aliases: &mut Vec<(String, String)>,
221) {
222 let Some(mut c) = extract_class(sub, src) else {
223 return;
224 };
225 let original_name = c.name.clone();
226 if c.name.is_empty()
227 && let Some(decl) = typedef.child_by_field_name("declarator")
228 {
229 c.name = node_text(decl, src).to_string();
230 }
231 if !original_name.is_empty()
232 && let Some(decl) = typedef.child_by_field_name("declarator")
233 {
234 let alias = node_text(decl, src).to_string();
235 if alias != original_name {
236 type_aliases.push((alias, original_name));
237 }
238 }
239 if !c.name.is_empty() {
240 classes.push(c);
241 }
242}
243
244fn register_simple_typedef(node: Node, src: &[u8], type_aliases: &mut Vec<(String, String)>) {
246 let alias = extract_typedef_alias(node, src);
247 let original = node
248 .child_by_field_name("type")
249 .map(|t| node_text(t, src).trim().to_string())
250 .unwrap_or_default();
251 if !alias.is_empty() && alias != original {
252 type_aliases.push((alias, original));
253 }
254}
255
256fn extract_typedef_alias(node: Node, src: &[u8]) -> String {
261 if let Some(decl) = node.child_by_field_name("declarator") {
262 return node_text(decl, src).trim().to_string();
263 }
264 let mut cursor = node.walk();
265 for child in node.children(&mut cursor) {
266 if child.kind() == "type_identifier" {
267 return node_text(child, src).trim().to_string();
268 }
269 }
270 String::new()
271}
272
273fn try_extract_macro_class(node: Node, src: &[u8]) -> Option<ClassInfo> {
276 let mut has_class_spec = false;
277 let mut cursor = node.walk();
278 for child in node.children(&mut cursor) {
279 if child.kind() == "class_specifier" || child.kind() == "struct_specifier" {
280 has_class_spec = true;
281 }
282 }
283 if !has_class_spec {
284 return None;
285 }
286 let name_node = node
288 .child_by_field_name("declarator")
289 .filter(|d| d.kind() == "identifier")?;
290 let name = node_text(name_node, src).to_string();
291 let name_col = name_node.start_position().column;
292 let name_end_col = name_node.end_position().column;
293 let body = node.child_by_field_name("body")?;
294 let start_line = node.start_position().row + 1;
295 let end_line = node.end_position().row + 1;
296 let method_count = count_methods(body);
297 let (field_names, field_types, first_field_type) = extract_field_info(body, src);
298
299 let parent_name = first_field_type;
301
302 Some(ClassInfo {
303 name,
304 start_line,
305 end_line,
306 name_col,
307 name_end_col,
308 line_count: end_line - start_line + 1,
309 method_count,
310 is_exported: true,
311 delegating_method_count: 0,
312 field_count: field_names.len(),
313 field_names,
314 field_types,
315 has_behavior: method_count > 0,
316 is_interface: false,
317 parent_name,
318 override_count: 0,
319 self_call_count: 0,
320 has_listener_field: false,
321 has_notify_method: false,
322 })
323}
324
325fn extract_function(
326 node: Node,
327 src: &[u8],
328 imports_map: &crate::type_ref::ImportsMap,
329) -> Option<FunctionInfo> {
330 let declarator = node.child_by_field_name("declarator")?;
331 let name_node = find_func_name_node(declarator)?;
332 let name = node_text(name_node, src).to_string();
333 let name_col = name_node.start_position().column;
334 let name_end_col = name_node.end_position().column;
335 let start_line = node.start_position().row + 1;
336 let end_line = node.end_position().row + 1;
337 let body = node.child_by_field_name("body");
338 let (param_count, param_types) = extract_params(declarator, src, imports_map);
339 let is_static = has_storage_class(node, src, "static");
340
341 Some(FunctionInfo {
342 name,
343 start_line,
344 end_line,
345 name_col,
346 name_end_col,
347 line_count: end_line - start_line + 1,
348 complexity: count_complexity(node),
349 body_hash: body.map(hash_ast),
350 is_exported: !is_static,
351 parameter_count: param_count,
352 parameter_types: param_types,
353 chain_depth: body.map(max_chain_depth).unwrap_or(0),
354 switch_arms: body.map(count_case_labels).unwrap_or(0),
355 external_refs: body
356 .map(|b| collect_external_refs_c(b, src))
357 .unwrap_or_default(),
358 is_delegating: body.map(|b| check_delegating_c(b, src)).unwrap_or(false),
359 comment_lines: count_comment_lines(node, src),
360 referenced_fields: body
361 .map(|b| collect_field_refs_c(b, src))
362 .unwrap_or_default(),
363 null_check_fields: body
364 .map(|b| collect_null_checks_c(b, src))
365 .unwrap_or_default(),
366 switch_dispatch_target: body.and_then(|b| extract_switch_target_c(b, src)),
367 optional_param_count: 0,
368 called_functions: body.map(|b| collect_calls_c(b, src)).unwrap_or_default(),
369 cognitive_complexity: body.map(cognitive_complexity_c).unwrap_or(0),
370 return_type: extract_c_return_type(node, src, imports_map),
371 })
372}
373
374fn extract_c_return_type(
379 node: Node,
380 src: &[u8],
381 imports_map: &crate::type_ref::ImportsMap,
382) -> Option<cha_core::TypeRef> {
383 let ty = node.child_by_field_name("type")?;
384 let base = node_text(ty, src).trim().to_string();
385 let is_ptr = node
386 .child_by_field_name("declarator")
387 .is_some_and(|d| d.kind() == "pointer_declarator");
388 let raw = if is_ptr { format!("{base} *") } else { base };
389 Some(crate::type_ref::resolve(raw, imports_map))
390}
391
392fn has_function_declarator(node: Node) -> bool {
396 node.child_by_field_name("declarator")
397 .is_some_and(has_function_declarator_inside)
398}
399
400fn has_function_declarator_inside(node: Node) -> bool {
401 if node.kind() == "function_declarator" {
402 return true;
403 }
404 if let Some(inner) = node.child_by_field_name("declarator") {
407 return has_function_declarator_inside(inner);
408 }
409 false
410}
411
412fn has_storage_class(node: Node, src: &[u8], keyword: &str) -> bool {
414 for i in 0..node.child_count() {
415 if let Some(child) = node.child(i)
416 && child.kind() == "storage_class_specifier"
417 && node_text(child, src) == keyword
418 {
419 return true;
420 }
421 }
422 false
423}
424
425fn find_func_name_node(declarator: Node) -> Option<Node> {
426 match declarator.kind() {
430 "identifier" | "field_identifier" | "destructor_name" | "operator_name" => {
431 return Some(declarator);
432 }
433 "qualified_identifier" => return crate::cpp::qualified_identifier_leaf(declarator),
434 _ => {}
435 }
436 let next = declarator
440 .child_by_field_name("declarator")
441 .or_else(|| first_named_child(declarator));
442 next.and_then(find_func_name_node)
443}
444
445fn first_named_child(node: Node) -> Option<Node> {
446 let mut c = node.walk();
447 node.children(&mut c).find(|n| n.is_named())
448}
449
450fn extract_params(
451 declarator: Node,
452 src: &[u8],
453 imports_map: &crate::type_ref::ImportsMap,
454) -> (usize, Vec<cha_core::TypeRef>) {
455 let params = match declarator.child_by_field_name("parameters") {
456 Some(p) => p,
457 None => return (0, vec![]),
458 };
459 let mut count = 0;
460 let mut types = Vec::new();
461 let mut cursor = params.walk();
462 for child in params.children(&mut cursor) {
463 if child.kind() == "parameter_declaration" {
464 count += 1;
465 let base = child
466 .child_by_field_name("type")
467 .map(|t| node_text(t, src).to_string())
468 .unwrap_or_else(|| "int".into());
469 let is_ptr = child
470 .child_by_field_name("declarator")
471 .is_some_and(|d| d.kind() == "pointer_declarator");
472 let raw = if is_ptr { format!("{base} *") } else { base };
473 types.push(crate::type_ref::resolve(raw, imports_map));
474 }
475 }
476 (count, types)
477}
478
479fn extract_class(node: Node, src: &[u8]) -> Option<ClassInfo> {
480 let (name, name_col, name_end_col) =
481 crate::cpp::class_name_triple(node.child_by_field_name("name"), src);
482 let start_line = node.start_position().row + 1;
483 let end_line = node.end_position().row + 1;
484 let body = node.child_by_field_name("body");
485 let method_count = body.map(count_methods).unwrap_or(0);
486 let (field_names, field_types, first_field_type) =
487 body.map(|b| extract_field_info(b, src)).unwrap_or_default();
488
489 let parent_name = crate::cpp::extract_cpp_base(node, src).or(first_field_type);
495
496 Some(ClassInfo {
497 name,
498 start_line,
499 end_line,
500 name_col,
501 name_end_col,
502 line_count: end_line - start_line + 1,
503 method_count,
504 is_exported: true,
505 delegating_method_count: 0,
506 field_count: field_names.len(),
507 field_names,
508 field_types,
509 has_behavior: method_count > 0,
510 is_interface: false,
511 parent_name,
512 override_count: 0,
513 self_call_count: 0,
514 has_listener_field: false,
515 has_notify_method: false,
516 })
517}
518
519fn extract_field_info(body: Node, src: &[u8]) -> (Vec<String>, Vec<String>, Option<String>) {
520 let mut names = Vec::new();
521 let mut types = Vec::new();
522 let mut first_type = None;
523 let mut cursor = body.walk();
524 for child in body.children(&mut cursor) {
525 if child.kind() == "field_declaration" {
526 if let Some(decl) = child.child_by_field_name("declarator") {
527 names.push(node_text(decl, src).to_string());
528 }
529 let ty = child
530 .child_by_field_name("type")
531 .map(|t| node_text(t, src).to_string());
532 if first_type.is_none() {
533 first_type = ty.clone();
534 }
535 types.push(ty.unwrap_or_default());
536 }
537 }
538 (names, types, first_type)
539}
540
541fn count_methods(body: Node) -> usize {
542 let mut count = 0;
543 let mut cursor = body.walk();
544 for child in body.children(&mut cursor) {
545 if child.kind() == "function_definition" || child.kind() == "declaration" {
546 count += 1;
547 }
548 }
549 count
550}
551
552fn extract_include(node: Node, src: &[u8]) -> Option<ImportInfo> {
553 let path = node.child_by_field_name("path")?;
554 let text = node_text(path, src)
555 .trim_matches(|c| c == '"' || c == '<' || c == '>')
556 .to_string();
557 Some(ImportInfo {
558 source: text,
559 line: node.start_position().row + 1,
560 col: node.start_position().column,
561 ..Default::default()
562 })
563}
564
565fn count_complexity(node: Node) -> usize {
566 let mut c = 1usize;
567 let mut cursor = node.walk();
568 visit_all(node, &mut cursor, &mut |n| match n.kind() {
569 "if_statement"
570 | "for_statement"
571 | "while_statement"
572 | "do_statement"
573 | "case_statement"
574 | "catch_clause"
575 | "conditional_expression" => c += 1,
576 "binary_expression" => {
577 if let Some(op) = n.child_by_field_name("operator") {
578 let kind = op.kind();
579 if kind == "&&" || kind == "||" {
580 c += 1;
581 }
582 }
583 }
584 _ => {}
585 });
586 c
587}
588
589fn max_chain_depth(node: Node) -> usize {
590 let mut max = 0;
591 let mut cursor = node.walk();
592 visit_all(node, &mut cursor, &mut |n| {
593 if n.kind() == "field_expression" {
594 let d = chain_len(n);
595 if d > max {
596 max = d;
597 }
598 }
599 });
600 max
601}
602
603fn chain_len(node: Node) -> usize {
604 let mut depth = 0;
605 let mut current = node;
606 while current.kind() == "field_expression" || current.kind() == "call_expression" {
607 if current.kind() == "field_expression" {
608 depth += 1;
609 }
610 match current.child(0) {
611 Some(c) => current = c,
612 None => break,
613 }
614 }
615 depth
616}
617
618fn count_case_labels(node: Node) -> usize {
619 let mut count = 0;
620 let mut cursor = node.walk();
621 visit_all(node, &mut cursor, &mut |n| {
622 if n.kind() == "case_statement" {
623 count += 1;
624 }
625 });
626 count
627}
628
629fn cognitive_complexity_c(node: tree_sitter::Node) -> usize {
630 let mut score = 0;
631 cc_walk_c(node, 0, &mut score);
632 score
633}
634
635fn cc_walk_c(node: tree_sitter::Node, nesting: usize, score: &mut usize) {
636 match node.kind() {
637 "if_statement" => {
638 *score += 1 + nesting;
639 cc_children_c(node, nesting + 1, score);
640 return;
641 }
642 "for_statement" | "while_statement" | "do_statement" => {
643 *score += 1 + nesting;
644 cc_children_c(node, nesting + 1, score);
645 return;
646 }
647 "switch_statement" => {
648 *score += 1 + nesting;
649 cc_children_c(node, nesting + 1, score);
650 return;
651 }
652 "else_clause" => {
653 *score += 1;
654 }
655 "binary_expression" => {
656 if let Some(op) = node.child_by_field_name("operator")
657 && (op.kind() == "&&" || op.kind() == "||")
658 {
659 *score += 1;
660 }
661 }
662 "catch_clause" => {
663 *score += 1 + nesting;
664 cc_children_c(node, nesting + 1, score);
665 return;
666 }
667 _ => {}
668 }
669 cc_children_c(node, nesting, score);
670}
671
672fn cc_children_c(node: tree_sitter::Node, nesting: usize, score: &mut usize) {
673 let mut cursor = node.walk();
674 for child in node.children(&mut cursor) {
675 cc_walk_c(child, nesting, score);
676 }
677}
678
679fn collect_external_refs_c(body: Node, src: &[u8]) -> Vec<String> {
680 let mut refs = Vec::new();
681 let mut cursor = body.walk();
682 visit_all(body, &mut cursor, &mut |n| {
683 if n.kind() == "field_expression"
684 && let Some(obj) = n.child(0)
685 && obj.kind() == "identifier"
686 {
687 let name = node_text(obj, src).to_string();
688 if !refs.contains(&name) {
689 refs.push(name);
690 }
691 }
692 });
693 refs
694}
695
696fn check_delegating_c(body: Node, src: &[u8]) -> bool {
697 let mut cursor = body.walk();
698 let stmts: Vec<Node> = body
699 .children(&mut cursor)
700 .filter(|n| n.kind() != "{" && n.kind() != "}" && !n.kind().contains("comment"))
701 .collect();
702 if stmts.len() != 1 {
703 return false;
704 }
705 let stmt = stmts[0];
706 let call = match stmt.kind() {
707 "return_statement" => stmt.child(1).filter(|c| c.kind() == "call_expression"),
708 "expression_statement" => stmt.child(0).filter(|c| c.kind() == "call_expression"),
709 _ => None,
710 };
711 call.and_then(|c| c.child(0))
712 .is_some_and(|f| node_text(f, src).contains('.') || node_text(f, src).contains("->"))
713}
714
715fn collect_field_refs_c(body: Node, src: &[u8]) -> Vec<String> {
716 let mut refs = Vec::new();
717 let mut cursor = body.walk();
718 visit_all(body, &mut cursor, &mut |n| {
719 if n.kind() == "field_expression"
720 && let Some(field) = n.child_by_field_name("field")
721 {
722 let name = node_text(field, src).to_string();
723 if !refs.contains(&name) {
724 refs.push(name);
725 }
726 }
727 });
728 refs
729}
730
731fn collect_null_checks_c(body: Node, src: &[u8]) -> Vec<String> {
732 let mut fields = Vec::new();
733 let mut cursor = body.walk();
734 visit_all(body, &mut cursor, &mut |n| {
735 if n.kind() == "binary_expression" {
736 let text = node_text(n, src);
737 if (text.contains("NULL") || text.contains("nullptr"))
738 && let Some(left) = n.child(0)
739 {
740 let name = node_text(left, src).to_string();
741 if !fields.contains(&name) {
742 fields.push(name);
743 }
744 }
745 }
746 });
747 fields
748}
749
750fn extract_switch_target_c(body: Node, src: &[u8]) -> Option<String> {
751 let mut cursor = body.walk();
752 let mut target = None;
753 visit_all(body, &mut cursor, &mut |n| {
754 if n.kind() == "switch_statement"
755 && target.is_none()
756 && let Some(cond) = n.child_by_field_name("condition")
757 {
758 target = Some(node_text(cond, src).trim_matches(['(', ')']).to_string());
759 }
760 });
761 target
762}
763
764fn collect_calls_c(body: Node, src: &[u8]) -> Vec<String> {
765 let mut calls = Vec::new();
766 let mut cursor = body.walk();
767 visit_all(body, &mut cursor, &mut |n| {
768 if n.kind() == "call_expression"
769 && let Some(func) = n.child(0)
770 {
771 let name = node_text(func, src).to_string();
772 if !calls.contains(&name) {
773 calls.push(name);
774 }
775 }
776 });
777 calls
778}
779
780fn count_comment_lines(node: Node, src: &[u8]) -> usize {
781 let mut count = 0;
782 let mut cursor = node.walk();
783 visit_all(node, &mut cursor, &mut |n| {
784 if n.kind() == "comment" {
785 count += node_text(n, src).lines().count();
786 }
787 });
788 count
789}
790
791fn hash_ast(node: Node) -> u64 {
792 let mut hasher = DefaultHasher::new();
793 hash_node(node, &mut hasher);
794 hasher.finish()
795}
796
797fn hash_node(node: Node, hasher: &mut DefaultHasher) {
798 node.kind().hash(hasher);
799 let mut cursor = node.walk();
800 for child in node.children(&mut cursor) {
801 hash_node(child, hasher);
802 }
803}
804
805fn node_text<'a>(node: Node, src: &'a [u8]) -> &'a str {
806 node.utf8_text(src).unwrap_or("")
807}
808
809fn collect_comments(root: Node, src: &[u8]) -> Vec<cha_core::CommentInfo> {
810 let mut comments = Vec::new();
811 let mut cursor = root.walk();
812 visit_all(root, &mut cursor, &mut |n| {
813 if n.kind().contains("comment") {
814 comments.push(cha_core::CommentInfo {
815 text: node_text(n, src).to_string(),
816 line: n.start_position().row + 1,
817 });
818 }
819 });
820 comments
821}
822
823fn visit_all<F: FnMut(Node)>(node: Node, cursor: &mut tree_sitter::TreeCursor, f: &mut F) {
824 f(node);
825 if cursor.goto_first_child() {
826 loop {
827 let child_node = cursor.node();
828 let mut child_cursor = child_node.walk();
829 visit_all(child_node, &mut child_cursor, f);
830 if !cursor.goto_next_sibling() {
831 break;
832 }
833 }
834 cursor.goto_parent();
835 }
836}