1use std::collections::hash_map::DefaultHasher;
2use std::hash::{Hash, Hasher};
3
4use cha_core::{ClassInfo, FunctionInfo, ImportInfo, SourceFile, SourceModel};
5use tree_sitter::{Node, Parser};
6
7use crate::LanguageParser;
8
9pub struct CParser;
10pub struct CppParser;
11
12impl LanguageParser for CParser {
13 fn language_name(&self) -> &str {
14 "c"
15 }
16 fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
17 parse_c_like(file, "c", &tree_sitter_c::LANGUAGE.into())
18 }
19}
20
21impl LanguageParser for CppParser {
22 fn language_name(&self) -> &str {
23 "cpp"
24 }
25 fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
26 parse_c_like(file, "cpp", &tree_sitter_cpp::LANGUAGE.into())
27 }
28}
29
30fn parse_c_like(
31 file: &SourceFile,
32 lang: &str,
33 language: &tree_sitter::Language,
34) -> Option<SourceModel> {
35 let mut parser = Parser::new();
36 parser.set_language(language).ok()?;
37 let tree = parser.parse(&file.content, None)?;
38 let root = tree.root_node();
39 let src = file.content.as_bytes();
40
41 let mut functions = Vec::new();
42 let mut classes = Vec::new();
43 let mut imports = Vec::new();
44 let mut type_aliases = Vec::new();
45
46 let imports_map = crate::c_imports::build(root, src);
47 collect_top_level(
48 root,
49 src,
50 &imports_map,
51 &mut functions,
52 &mut classes,
53 &mut imports,
54 &mut type_aliases,
55 );
56
57 if is_header_file(file) {
62 for f in &mut functions {
63 f.is_exported = true;
64 }
65 }
66
67 Some(SourceModel {
68 language: lang.into(),
69 total_lines: file.line_count(),
70 functions,
71 classes,
72 imports,
73 comments: collect_comments(root, src),
74 type_aliases,
75 })
76}
77
78fn is_header_file(file: &SourceFile) -> bool {
79 file.path
80 .extension()
81 .is_some_and(|e| e == "h" || e == "hxx" || e == "hpp")
82}
83
84fn collect_top_level(
87 root: Node,
88 src: &[u8],
89 imports_map: &crate::type_ref::ImportsMap,
90 functions: &mut Vec<FunctionInfo>,
91 classes: &mut Vec<ClassInfo>,
92 imports: &mut Vec<ImportInfo>,
93 type_aliases: &mut Vec<(String, String)>,
94) {
95 let mut cursor = root.walk();
96 for child in root.children(&mut cursor) {
97 match child.kind() {
98 "function_definition" => {
99 if let Some(c) = try_extract_macro_class(child, src) {
103 classes.push(c);
104 } else if let Some(f) = extract_function(child, src, imports_map) {
105 functions.push(f);
106 }
107 }
108 "declaration" => {
109 if has_function_declarator(child)
117 && let Some(f) = extract_function(child, src, imports_map)
118 {
119 functions.push(f);
120 }
121 }
122 "struct_specifier" | "class_specifier" => {
123 if let Some(c) = extract_class(child, src) {
124 classes.push(c);
125 }
126 }
127 "type_definition" => {
128 extract_typedef_struct(child, src, classes, type_aliases);
129 }
130 "preproc_include" => {
131 if let Some(imp) = extract_include(child, src) {
132 imports.push(imp);
133 }
134 }
135 _ => {
136 if child.child_count() > 0 {
137 collect_top_level(
138 child,
139 src,
140 imports_map,
141 functions,
142 classes,
143 imports,
144 type_aliases,
145 );
146 }
147 }
148 }
149 }
150}
151
152fn extract_typedef_struct(
153 node: Node,
154 src: &[u8],
155 classes: &mut Vec<ClassInfo>,
156 type_aliases: &mut Vec<(String, String)>,
157) {
158 let found_struct = register_typedef_struct_children(node, src, classes, type_aliases);
159 if !found_struct {
160 register_simple_typedef(node, src, type_aliases);
161 }
162}
163
164fn register_typedef_struct_children(
165 node: Node,
166 src: &[u8],
167 classes: &mut Vec<ClassInfo>,
168 type_aliases: &mut Vec<(String, String)>,
169) -> bool {
170 let mut found_struct = false;
171 let mut inner = node.walk();
172 for sub in node.children(&mut inner) {
173 if sub.kind() != "struct_specifier" && sub.kind() != "class_specifier" {
174 continue;
175 }
176 found_struct = true;
177 register_single_typedef_struct(node, sub, src, classes, type_aliases);
178 }
179 found_struct
180}
181
182fn register_single_typedef_struct(
183 typedef: Node,
184 sub: Node,
185 src: &[u8],
186 classes: &mut Vec<ClassInfo>,
187 type_aliases: &mut Vec<(String, String)>,
188) {
189 let Some(mut c) = extract_class(sub, src) else {
190 return;
191 };
192 let original_name = c.name.clone();
193 if c.name.is_empty()
194 && let Some(decl) = typedef.child_by_field_name("declarator")
195 {
196 c.name = node_text(decl, src).to_string();
197 }
198 if !original_name.is_empty()
199 && let Some(decl) = typedef.child_by_field_name("declarator")
200 {
201 let alias = node_text(decl, src).to_string();
202 if alias != original_name {
203 type_aliases.push((alias, original_name));
204 }
205 }
206 if !c.name.is_empty() {
207 classes.push(c);
208 }
209}
210
211fn register_simple_typedef(node: Node, src: &[u8], type_aliases: &mut Vec<(String, String)>) {
213 let alias = extract_typedef_alias(node, src);
214 let original = node
215 .child_by_field_name("type")
216 .map(|t| node_text(t, src).trim().to_string())
217 .unwrap_or_default();
218 if !alias.is_empty() && alias != original {
219 type_aliases.push((alias, original));
220 }
221}
222
223fn extract_typedef_alias(node: Node, src: &[u8]) -> String {
228 if let Some(decl) = node.child_by_field_name("declarator") {
229 return node_text(decl, src).trim().to_string();
230 }
231 let mut cursor = node.walk();
232 for child in node.children(&mut cursor) {
233 if child.kind() == "type_identifier" {
234 return node_text(child, src).trim().to_string();
235 }
236 }
237 String::new()
238}
239
240fn try_extract_macro_class(node: Node, src: &[u8]) -> Option<ClassInfo> {
243 let mut has_class_spec = false;
244 let mut cursor = node.walk();
245 for child in node.children(&mut cursor) {
246 if child.kind() == "class_specifier" || child.kind() == "struct_specifier" {
247 has_class_spec = true;
248 }
249 }
250 if !has_class_spec {
251 return None;
252 }
253 let name_node = node
255 .child_by_field_name("declarator")
256 .filter(|d| d.kind() == "identifier")?;
257 let name = node_text(name_node, src).to_string();
258 let name_col = name_node.start_position().column;
259 let name_end_col = name_node.end_position().column;
260 let body = node.child_by_field_name("body")?;
261 let start_line = node.start_position().row + 1;
262 let end_line = node.end_position().row + 1;
263 let method_count = count_methods(body);
264 let (field_names, field_types, first_field_type) = extract_field_info(body, src);
265
266 let parent_name = first_field_type;
268
269 Some(ClassInfo {
270 name,
271 start_line,
272 end_line,
273 name_col,
274 name_end_col,
275 line_count: end_line - start_line + 1,
276 method_count,
277 is_exported: true,
278 delegating_method_count: 0,
279 field_count: field_names.len(),
280 field_names,
281 field_types,
282 has_behavior: method_count > 0,
283 is_interface: false,
284 parent_name,
285 override_count: 0,
286 self_call_count: 0,
287 has_listener_field: false,
288 has_notify_method: false,
289 })
290}
291
292fn extract_function(
293 node: Node,
294 src: &[u8],
295 imports_map: &crate::type_ref::ImportsMap,
296) -> Option<FunctionInfo> {
297 let declarator = node.child_by_field_name("declarator")?;
298 let name_node = find_func_name_node(declarator)?;
299 let name = node_text(name_node, src).to_string();
300 let name_col = name_node.start_position().column;
301 let name_end_col = name_node.end_position().column;
302 let start_line = node.start_position().row + 1;
303 let end_line = node.end_position().row + 1;
304 let body = node.child_by_field_name("body");
305 let (param_count, param_types) = extract_params(declarator, src, imports_map);
306 let is_static = has_storage_class(node, src, "static");
307
308 Some(FunctionInfo {
309 name,
310 start_line,
311 end_line,
312 name_col,
313 name_end_col,
314 line_count: end_line - start_line + 1,
315 complexity: count_complexity(node),
316 body_hash: body.map(hash_ast),
317 is_exported: !is_static,
318 parameter_count: param_count,
319 parameter_types: param_types,
320 chain_depth: body.map(max_chain_depth).unwrap_or(0),
321 switch_arms: body.map(count_case_labels).unwrap_or(0),
322 external_refs: body
323 .map(|b| collect_external_refs_c(b, src))
324 .unwrap_or_default(),
325 is_delegating: body.map(|b| check_delegating_c(b, src)).unwrap_or(false),
326 comment_lines: count_comment_lines(node, src),
327 referenced_fields: body
328 .map(|b| collect_field_refs_c(b, src))
329 .unwrap_or_default(),
330 null_check_fields: body
331 .map(|b| collect_null_checks_c(b, src))
332 .unwrap_or_default(),
333 switch_dispatch_target: body.and_then(|b| extract_switch_target_c(b, src)),
334 optional_param_count: 0,
335 called_functions: body.map(|b| collect_calls_c(b, src)).unwrap_or_default(),
336 cognitive_complexity: body.map(cognitive_complexity_c).unwrap_or(0),
337 return_type: extract_c_return_type(node, src, imports_map),
338 })
339}
340
341fn extract_c_return_type(
346 node: Node,
347 src: &[u8],
348 imports_map: &crate::type_ref::ImportsMap,
349) -> Option<cha_core::TypeRef> {
350 let ty = node.child_by_field_name("type")?;
351 let base = node_text(ty, src).trim().to_string();
352 let is_ptr = node
353 .child_by_field_name("declarator")
354 .is_some_and(|d| d.kind() == "pointer_declarator");
355 let raw = if is_ptr { format!("{base} *") } else { base };
356 Some(crate::type_ref::resolve(raw, imports_map))
357}
358
359fn has_function_declarator(node: Node) -> bool {
363 node.child_by_field_name("declarator")
364 .is_some_and(has_function_declarator_inside)
365}
366
367fn has_function_declarator_inside(node: Node) -> bool {
368 if node.kind() == "function_declarator" {
369 return true;
370 }
371 if let Some(inner) = node.child_by_field_name("declarator") {
374 return has_function_declarator_inside(inner);
375 }
376 false
377}
378
379fn has_storage_class(node: Node, src: &[u8], keyword: &str) -> bool {
381 for i in 0..node.child_count() {
382 if let Some(child) = node.child(i)
383 && child.kind() == "storage_class_specifier"
384 && node_text(child, src) == keyword
385 {
386 return true;
387 }
388 }
389 false
390}
391
392fn find_func_name_node(declarator: Node) -> Option<Node> {
393 if declarator.kind() == "identifier" {
394 return Some(declarator);
395 }
396 declarator
397 .child_by_field_name("declarator")
398 .and_then(find_func_name_node)
399}
400
401fn extract_params(
402 declarator: Node,
403 src: &[u8],
404 imports_map: &crate::type_ref::ImportsMap,
405) -> (usize, Vec<cha_core::TypeRef>) {
406 let params = match declarator.child_by_field_name("parameters") {
407 Some(p) => p,
408 None => return (0, vec![]),
409 };
410 let mut count = 0;
411 let mut types = Vec::new();
412 let mut cursor = params.walk();
413 for child in params.children(&mut cursor) {
414 if child.kind() == "parameter_declaration" {
415 count += 1;
416 let base = child
417 .child_by_field_name("type")
418 .map(|t| node_text(t, src).to_string())
419 .unwrap_or_else(|| "int".into());
420 let is_ptr = child
421 .child_by_field_name("declarator")
422 .is_some_and(|d| d.kind() == "pointer_declarator");
423 let raw = if is_ptr { format!("{base} *") } else { base };
424 types.push(crate::type_ref::resolve(raw, imports_map));
425 }
426 }
427 (count, types)
428}
429
430fn extract_class(node: Node, src: &[u8]) -> Option<ClassInfo> {
431 let name_node = node.child_by_field_name("name");
432 let name = name_node
433 .map(|n| node_text(n, src).to_string())
434 .unwrap_or_default();
435 let name_col = name_node.map(|n| n.start_position().column).unwrap_or(0);
436 let name_end_col = name_node.map(|n| n.end_position().column).unwrap_or(0);
437 let start_line = node.start_position().row + 1;
438 let end_line = node.end_position().row + 1;
439 let body = node.child_by_field_name("body");
440 let method_count = body.map(count_methods).unwrap_or(0);
441 let (field_names, field_types, first_field_type) =
442 body.map(|b| extract_field_info(b, src)).unwrap_or_default();
443
444 Some(ClassInfo {
445 name,
446 start_line,
447 end_line,
448 name_col,
449 name_end_col,
450 line_count: end_line - start_line + 1,
451 method_count,
452 is_exported: true,
453 delegating_method_count: 0,
454 field_count: field_names.len(),
455 field_names,
456 field_types,
457 has_behavior: method_count > 0,
458 is_interface: false,
459 parent_name: first_field_type,
462 override_count: 0,
463 self_call_count: 0,
464 has_listener_field: false,
465 has_notify_method: false,
466 })
467}
468
469fn extract_field_info(body: Node, src: &[u8]) -> (Vec<String>, Vec<String>, Option<String>) {
470 let mut names = Vec::new();
471 let mut types = Vec::new();
472 let mut first_type = None;
473 let mut cursor = body.walk();
474 for child in body.children(&mut cursor) {
475 if child.kind() == "field_declaration" {
476 if let Some(decl) = child.child_by_field_name("declarator") {
477 names.push(node_text(decl, src).to_string());
478 }
479 let ty = child
480 .child_by_field_name("type")
481 .map(|t| node_text(t, src).to_string());
482 if first_type.is_none() {
483 first_type = ty.clone();
484 }
485 types.push(ty.unwrap_or_default());
486 }
487 }
488 (names, types, first_type)
489}
490
491fn count_methods(body: Node) -> usize {
492 let mut count = 0;
493 let mut cursor = body.walk();
494 for child in body.children(&mut cursor) {
495 if child.kind() == "function_definition" || child.kind() == "declaration" {
496 count += 1;
497 }
498 }
499 count
500}
501
502fn extract_include(node: Node, src: &[u8]) -> Option<ImportInfo> {
503 let path = node.child_by_field_name("path")?;
504 let text = node_text(path, src)
505 .trim_matches(|c| c == '"' || c == '<' || c == '>')
506 .to_string();
507 Some(ImportInfo {
508 source: text,
509 line: node.start_position().row + 1,
510 col: node.start_position().column,
511 ..Default::default()
512 })
513}
514
515fn count_complexity(node: Node) -> usize {
516 let mut c = 1usize;
517 let mut cursor = node.walk();
518 visit_all(node, &mut cursor, &mut |n| match n.kind() {
519 "if_statement"
520 | "for_statement"
521 | "while_statement"
522 | "do_statement"
523 | "case_statement"
524 | "catch_clause"
525 | "conditional_expression" => c += 1,
526 "binary_expression" => {
527 if let Some(op) = n.child_by_field_name("operator") {
528 let kind = op.kind();
529 if kind == "&&" || kind == "||" {
530 c += 1;
531 }
532 }
533 }
534 _ => {}
535 });
536 c
537}
538
539fn max_chain_depth(node: Node) -> usize {
540 let mut max = 0;
541 let mut cursor = node.walk();
542 visit_all(node, &mut cursor, &mut |n| {
543 if n.kind() == "field_expression" {
544 let d = chain_len(n);
545 if d > max {
546 max = d;
547 }
548 }
549 });
550 max
551}
552
553fn chain_len(node: Node) -> usize {
554 let mut depth = 0;
555 let mut current = node;
556 while current.kind() == "field_expression" || current.kind() == "call_expression" {
557 if current.kind() == "field_expression" {
558 depth += 1;
559 }
560 match current.child(0) {
561 Some(c) => current = c,
562 None => break,
563 }
564 }
565 depth
566}
567
568fn count_case_labels(node: Node) -> usize {
569 let mut count = 0;
570 let mut cursor = node.walk();
571 visit_all(node, &mut cursor, &mut |n| {
572 if n.kind() == "case_statement" {
573 count += 1;
574 }
575 });
576 count
577}
578
579fn cognitive_complexity_c(node: tree_sitter::Node) -> usize {
580 let mut score = 0;
581 cc_walk_c(node, 0, &mut score);
582 score
583}
584
585fn cc_walk_c(node: tree_sitter::Node, nesting: usize, score: &mut usize) {
586 match node.kind() {
587 "if_statement" => {
588 *score += 1 + nesting;
589 cc_children_c(node, nesting + 1, score);
590 return;
591 }
592 "for_statement" | "while_statement" | "do_statement" => {
593 *score += 1 + nesting;
594 cc_children_c(node, nesting + 1, score);
595 return;
596 }
597 "switch_statement" => {
598 *score += 1 + nesting;
599 cc_children_c(node, nesting + 1, score);
600 return;
601 }
602 "else_clause" => {
603 *score += 1;
604 }
605 "binary_expression" => {
606 if let Some(op) = node.child_by_field_name("operator")
607 && (op.kind() == "&&" || op.kind() == "||")
608 {
609 *score += 1;
610 }
611 }
612 "catch_clause" => {
613 *score += 1 + nesting;
614 cc_children_c(node, nesting + 1, score);
615 return;
616 }
617 _ => {}
618 }
619 cc_children_c(node, nesting, score);
620}
621
622fn cc_children_c(node: tree_sitter::Node, nesting: usize, score: &mut usize) {
623 let mut cursor = node.walk();
624 for child in node.children(&mut cursor) {
625 cc_walk_c(child, nesting, score);
626 }
627}
628
629fn collect_external_refs_c(body: Node, src: &[u8]) -> Vec<String> {
630 let mut refs = Vec::new();
631 let mut cursor = body.walk();
632 visit_all(body, &mut cursor, &mut |n| {
633 if n.kind() == "field_expression"
634 && let Some(obj) = n.child(0)
635 && obj.kind() == "identifier"
636 {
637 let name = node_text(obj, src).to_string();
638 if !refs.contains(&name) {
639 refs.push(name);
640 }
641 }
642 });
643 refs
644}
645
646fn check_delegating_c(body: Node, src: &[u8]) -> bool {
647 let mut cursor = body.walk();
648 let stmts: Vec<Node> = body
649 .children(&mut cursor)
650 .filter(|n| n.kind() != "{" && n.kind() != "}" && !n.kind().contains("comment"))
651 .collect();
652 if stmts.len() != 1 {
653 return false;
654 }
655 let stmt = stmts[0];
656 let call = match stmt.kind() {
657 "return_statement" => stmt.child(1).filter(|c| c.kind() == "call_expression"),
658 "expression_statement" => stmt.child(0).filter(|c| c.kind() == "call_expression"),
659 _ => None,
660 };
661 call.and_then(|c| c.child(0))
662 .is_some_and(|f| node_text(f, src).contains('.') || node_text(f, src).contains("->"))
663}
664
665fn collect_field_refs_c(body: Node, src: &[u8]) -> Vec<String> {
666 let mut refs = Vec::new();
667 let mut cursor = body.walk();
668 visit_all(body, &mut cursor, &mut |n| {
669 if n.kind() == "field_expression"
670 && let Some(field) = n.child_by_field_name("field")
671 {
672 let name = node_text(field, src).to_string();
673 if !refs.contains(&name) {
674 refs.push(name);
675 }
676 }
677 });
678 refs
679}
680
681fn collect_null_checks_c(body: Node, src: &[u8]) -> Vec<String> {
682 let mut fields = Vec::new();
683 let mut cursor = body.walk();
684 visit_all(body, &mut cursor, &mut |n| {
685 if n.kind() == "binary_expression" {
686 let text = node_text(n, src);
687 if (text.contains("NULL") || text.contains("nullptr"))
688 && let Some(left) = n.child(0)
689 {
690 let name = node_text(left, src).to_string();
691 if !fields.contains(&name) {
692 fields.push(name);
693 }
694 }
695 }
696 });
697 fields
698}
699
700fn extract_switch_target_c(body: Node, src: &[u8]) -> Option<String> {
701 let mut cursor = body.walk();
702 let mut target = None;
703 visit_all(body, &mut cursor, &mut |n| {
704 if n.kind() == "switch_statement"
705 && target.is_none()
706 && let Some(cond) = n.child_by_field_name("condition")
707 {
708 target = Some(node_text(cond, src).trim_matches(['(', ')']).to_string());
709 }
710 });
711 target
712}
713
714fn collect_calls_c(body: Node, src: &[u8]) -> Vec<String> {
715 let mut calls = Vec::new();
716 let mut cursor = body.walk();
717 visit_all(body, &mut cursor, &mut |n| {
718 if n.kind() == "call_expression"
719 && let Some(func) = n.child(0)
720 {
721 let name = node_text(func, src).to_string();
722 if !calls.contains(&name) {
723 calls.push(name);
724 }
725 }
726 });
727 calls
728}
729
730fn count_comment_lines(node: Node, src: &[u8]) -> usize {
731 let mut count = 0;
732 let mut cursor = node.walk();
733 visit_all(node, &mut cursor, &mut |n| {
734 if n.kind() == "comment" {
735 count += node_text(n, src).lines().count();
736 }
737 });
738 count
739}
740
741fn hash_ast(node: Node) -> u64 {
742 let mut hasher = DefaultHasher::new();
743 hash_node(node, &mut hasher);
744 hasher.finish()
745}
746
747fn hash_node(node: Node, hasher: &mut DefaultHasher) {
748 node.kind().hash(hasher);
749 let mut cursor = node.walk();
750 for child in node.children(&mut cursor) {
751 hash_node(child, hasher);
752 }
753}
754
755fn node_text<'a>(node: Node, src: &'a [u8]) -> &'a str {
756 node.utf8_text(src).unwrap_or("")
757}
758
759fn collect_comments(root: Node, src: &[u8]) -> Vec<cha_core::CommentInfo> {
760 let mut comments = Vec::new();
761 let mut cursor = root.walk();
762 visit_all(root, &mut cursor, &mut |n| {
763 if n.kind().contains("comment") {
764 comments.push(cha_core::CommentInfo {
765 text: node_text(n, src).to_string(),
766 line: n.start_position().row + 1,
767 });
768 }
769 });
770 comments
771}
772
773fn visit_all<F: FnMut(Node)>(node: Node, cursor: &mut tree_sitter::TreeCursor, f: &mut F) {
774 f(node);
775 if cursor.goto_first_child() {
776 loop {
777 let child_node = cursor.node();
778 let mut child_cursor = child_node.walk();
779 visit_all(child_node, &mut child_cursor, f);
780 if !cursor.goto_next_sibling() {
781 break;
782 }
783 }
784 cursor.goto_parent();
785 }
786}