1use std::collections::hash_map::DefaultHasher;
2use std::hash::{Hash, Hasher};
3
4use cha_core::{ClassInfo, FunctionInfo, ImportInfo, SourceFile, SourceModel};
5use tree_sitter::{Node, Parser};
6
7use crate::LanguageParser;
8
9pub struct CParser;
10pub struct CppParser;
11
12impl LanguageParser for CParser {
13 fn language_name(&self) -> &str {
14 "c"
15 }
16 fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
17 parse_c_like(file, "c", &tree_sitter_c::LANGUAGE.into())
18 }
19}
20
21impl LanguageParser for CppParser {
22 fn language_name(&self) -> &str {
23 "cpp"
24 }
25 fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
26 parse_c_like(file, "cpp", &tree_sitter_cpp::LANGUAGE.into())
27 }
28}
29
30fn parse_c_like(
31 file: &SourceFile,
32 lang: &str,
33 language: &tree_sitter::Language,
34) -> Option<SourceModel> {
35 let mut parser = Parser::new();
36 parser.set_language(language).ok()?;
37 let tree = parser.parse(&file.content, None)?;
38 let root = tree.root_node();
39 let src = file.content.as_bytes();
40
41 let mut functions = Vec::new();
42 let mut classes = Vec::new();
43 let mut imports = Vec::new();
44 let mut type_aliases = Vec::new();
45
46 let imports_map = crate::c_imports::build(root, src);
47 collect_top_level(
48 root,
49 src,
50 &imports_map,
51 &mut functions,
52 &mut classes,
53 &mut imports,
54 &mut type_aliases,
55 );
56
57 associate_methods(&functions, &mut classes);
61
62 if is_header_file(file) {
63 for f in &mut functions {
64 f.is_exported = true;
65 }
66 }
67
68 Some(SourceModel {
69 language: lang.into(),
70 total_lines: file.line_count(),
71 functions,
72 classes,
73 imports,
74 comments: collect_comments(root, src),
75 type_aliases,
76 })
77}
78
79fn is_header_file(file: &SourceFile) -> bool {
80 file.path
81 .extension()
82 .is_some_and(|e| e == "h" || e == "hxx" || e == "hpp")
83}
84
85fn associate_methods(functions: &[FunctionInfo], classes: &mut [ClassInfo]) {
88 for class in classes.iter_mut() {
89 let count = functions
90 .iter()
91 .filter(|f| {
92 f.parameter_types.first().is_some_and(|t| {
93 t.raw.contains('*')
94 && t.raw.split('*').next().unwrap_or("").trim() == class.name
95 })
96 })
97 .count();
98 if count > 0 {
99 class.method_count += count;
100 class.has_behavior = true;
101 }
102 }
103}
104
105fn collect_top_level(
107 root: Node,
108 src: &[u8],
109 imports_map: &crate::type_ref::ImportsMap,
110 functions: &mut Vec<FunctionInfo>,
111 classes: &mut Vec<ClassInfo>,
112 imports: &mut Vec<ImportInfo>,
113 type_aliases: &mut Vec<(String, String)>,
114) {
115 let mut cursor = root.walk();
116 for child in root.children(&mut cursor) {
117 match child.kind() {
118 "function_definition" => {
119 if let Some(c) = try_extract_macro_class(child, src) {
123 classes.push(c);
124 } else if let Some(f) = extract_function(child, src, imports_map) {
125 functions.push(f);
126 }
127 }
128 "struct_specifier" | "class_specifier" => {
129 if let Some(c) = extract_class(child, src) {
130 classes.push(c);
131 }
132 }
133 "type_definition" => {
134 extract_typedef_struct(child, src, classes, type_aliases);
135 }
136 "preproc_include" => {
137 if let Some(imp) = extract_include(child, src) {
138 imports.push(imp);
139 }
140 }
141 _ => {
142 if child.child_count() > 0 {
143 collect_top_level(
144 child,
145 src,
146 imports_map,
147 functions,
148 classes,
149 imports,
150 type_aliases,
151 );
152 }
153 }
154 }
155 }
156}
157
158fn extract_typedef_struct(
159 node: Node,
160 src: &[u8],
161 classes: &mut Vec<ClassInfo>,
162 type_aliases: &mut Vec<(String, String)>,
163) {
164 let found_struct = register_typedef_struct_children(node, src, classes, type_aliases);
165 if !found_struct {
166 register_simple_typedef(node, src, type_aliases);
167 }
168}
169
170fn register_typedef_struct_children(
171 node: Node,
172 src: &[u8],
173 classes: &mut Vec<ClassInfo>,
174 type_aliases: &mut Vec<(String, String)>,
175) -> bool {
176 let mut found_struct = false;
177 let mut inner = node.walk();
178 for sub in node.children(&mut inner) {
179 if sub.kind() != "struct_specifier" && sub.kind() != "class_specifier" {
180 continue;
181 }
182 found_struct = true;
183 register_single_typedef_struct(node, sub, src, classes, type_aliases);
184 }
185 found_struct
186}
187
188fn register_single_typedef_struct(
189 typedef: Node,
190 sub: Node,
191 src: &[u8],
192 classes: &mut Vec<ClassInfo>,
193 type_aliases: &mut Vec<(String, String)>,
194) {
195 let Some(mut c) = extract_class(sub, src) else {
196 return;
197 };
198 let original_name = c.name.clone();
199 if c.name.is_empty()
200 && let Some(decl) = typedef.child_by_field_name("declarator")
201 {
202 c.name = node_text(decl, src).to_string();
203 }
204 if !original_name.is_empty()
205 && let Some(decl) = typedef.child_by_field_name("declarator")
206 {
207 let alias = node_text(decl, src).to_string();
208 if alias != original_name {
209 type_aliases.push((alias, original_name));
210 }
211 }
212 if !c.name.is_empty() {
213 classes.push(c);
214 }
215}
216
217fn register_simple_typedef(node: Node, src: &[u8], type_aliases: &mut Vec<(String, String)>) {
219 let alias = extract_typedef_alias(node, src);
220 let original = node
221 .child_by_field_name("type")
222 .map(|t| node_text(t, src).trim().to_string())
223 .unwrap_or_default();
224 if !alias.is_empty() && alias != original {
225 type_aliases.push((alias, original));
226 }
227}
228
229fn extract_typedef_alias(node: Node, src: &[u8]) -> String {
234 if let Some(decl) = node.child_by_field_name("declarator") {
235 return node_text(decl, src).trim().to_string();
236 }
237 let mut cursor = node.walk();
238 for child in node.children(&mut cursor) {
239 if child.kind() == "type_identifier" {
240 return node_text(child, src).trim().to_string();
241 }
242 }
243 String::new()
244}
245
246fn try_extract_macro_class(node: Node, src: &[u8]) -> Option<ClassInfo> {
249 let mut has_class_spec = false;
250 let mut cursor = node.walk();
251 for child in node.children(&mut cursor) {
252 if child.kind() == "class_specifier" || child.kind() == "struct_specifier" {
253 has_class_spec = true;
254 }
255 }
256 if !has_class_spec {
257 return None;
258 }
259 let name_node = node
261 .child_by_field_name("declarator")
262 .filter(|d| d.kind() == "identifier")?;
263 let name = node_text(name_node, src).to_string();
264 let name_col = name_node.start_position().column;
265 let name_end_col = name_node.end_position().column;
266 let body = node.child_by_field_name("body")?;
267 let start_line = node.start_position().row + 1;
268 let end_line = node.end_position().row + 1;
269 let method_count = count_methods(body);
270 let (field_names, field_types, first_field_type) = extract_field_info(body, src);
271
272 let parent_name = first_field_type;
274
275 Some(ClassInfo {
276 name,
277 start_line,
278 end_line,
279 name_col,
280 name_end_col,
281 line_count: end_line - start_line + 1,
282 method_count,
283 is_exported: true,
284 delegating_method_count: 0,
285 field_count: field_names.len(),
286 field_names,
287 field_types,
288 has_behavior: method_count > 0,
289 is_interface: false,
290 parent_name,
291 override_count: 0,
292 self_call_count: 0,
293 has_listener_field: false,
294 has_notify_method: false,
295 })
296}
297
298fn extract_function(
299 node: Node,
300 src: &[u8],
301 imports_map: &crate::type_ref::ImportsMap,
302) -> Option<FunctionInfo> {
303 let declarator = node.child_by_field_name("declarator")?;
304 let name_node = find_func_name_node(declarator)?;
305 let name = node_text(name_node, src).to_string();
306 let name_col = name_node.start_position().column;
307 let name_end_col = name_node.end_position().column;
308 let start_line = node.start_position().row + 1;
309 let end_line = node.end_position().row + 1;
310 let body = node.child_by_field_name("body");
311 let (param_count, param_types) = extract_params(declarator, src, imports_map);
312 let is_static = has_storage_class(node, src, "static");
313
314 Some(FunctionInfo {
315 name,
316 start_line,
317 end_line,
318 name_col,
319 name_end_col,
320 line_count: end_line - start_line + 1,
321 complexity: count_complexity(node),
322 body_hash: body.map(hash_ast),
323 is_exported: !is_static,
324 parameter_count: param_count,
325 parameter_types: param_types,
326 chain_depth: body.map(max_chain_depth).unwrap_or(0),
327 switch_arms: body.map(count_case_labels).unwrap_or(0),
328 external_refs: body
329 .map(|b| collect_external_refs_c(b, src))
330 .unwrap_or_default(),
331 is_delegating: body.map(|b| check_delegating_c(b, src)).unwrap_or(false),
332 comment_lines: count_comment_lines(node, src),
333 referenced_fields: body
334 .map(|b| collect_field_refs_c(b, src))
335 .unwrap_or_default(),
336 null_check_fields: body
337 .map(|b| collect_null_checks_c(b, src))
338 .unwrap_or_default(),
339 switch_dispatch_target: body.and_then(|b| extract_switch_target_c(b, src)),
340 optional_param_count: 0,
341 called_functions: body.map(|b| collect_calls_c(b, src)).unwrap_or_default(),
342 cognitive_complexity: body.map(cognitive_complexity_c).unwrap_or(0),
343 return_type: extract_c_return_type(node, src, imports_map),
344 })
345}
346
347fn extract_c_return_type(
352 node: Node,
353 src: &[u8],
354 imports_map: &crate::type_ref::ImportsMap,
355) -> Option<cha_core::TypeRef> {
356 let ty = node.child_by_field_name("type")?;
357 let base = node_text(ty, src).trim().to_string();
358 let is_ptr = node
359 .child_by_field_name("declarator")
360 .is_some_and(|d| d.kind() == "pointer_declarator");
361 let raw = if is_ptr { format!("{base} *") } else { base };
362 Some(crate::type_ref::resolve(raw, imports_map))
363}
364
365fn has_storage_class(node: Node, src: &[u8], keyword: &str) -> bool {
367 for i in 0..node.child_count() {
368 if let Some(child) = node.child(i)
369 && child.kind() == "storage_class_specifier"
370 && node_text(child, src) == keyword
371 {
372 return true;
373 }
374 }
375 false
376}
377
378fn find_func_name_node(declarator: Node) -> Option<Node> {
379 if declarator.kind() == "identifier" {
380 return Some(declarator);
381 }
382 declarator
383 .child_by_field_name("declarator")
384 .and_then(find_func_name_node)
385}
386
387fn extract_params(
388 declarator: Node,
389 src: &[u8],
390 imports_map: &crate::type_ref::ImportsMap,
391) -> (usize, Vec<cha_core::TypeRef>) {
392 let params = match declarator.child_by_field_name("parameters") {
393 Some(p) => p,
394 None => return (0, vec![]),
395 };
396 let mut count = 0;
397 let mut types = Vec::new();
398 let mut cursor = params.walk();
399 for child in params.children(&mut cursor) {
400 if child.kind() == "parameter_declaration" {
401 count += 1;
402 let base = child
403 .child_by_field_name("type")
404 .map(|t| node_text(t, src).to_string())
405 .unwrap_or_else(|| "int".into());
406 let is_ptr = child
407 .child_by_field_name("declarator")
408 .is_some_and(|d| d.kind() == "pointer_declarator");
409 let raw = if is_ptr { format!("{base} *") } else { base };
410 types.push(crate::type_ref::resolve(raw, imports_map));
411 }
412 }
413 (count, types)
414}
415
416fn extract_class(node: Node, src: &[u8]) -> Option<ClassInfo> {
417 let name_node = node.child_by_field_name("name");
418 let name = name_node
419 .map(|n| node_text(n, src).to_string())
420 .unwrap_or_default();
421 let name_col = name_node.map(|n| n.start_position().column).unwrap_or(0);
422 let name_end_col = name_node.map(|n| n.end_position().column).unwrap_or(0);
423 let start_line = node.start_position().row + 1;
424 let end_line = node.end_position().row + 1;
425 let body = node.child_by_field_name("body");
426 let method_count = body.map(count_methods).unwrap_or(0);
427 let (field_names, field_types, first_field_type) =
428 body.map(|b| extract_field_info(b, src)).unwrap_or_default();
429
430 Some(ClassInfo {
431 name,
432 start_line,
433 end_line,
434 name_col,
435 name_end_col,
436 line_count: end_line - start_line + 1,
437 method_count,
438 is_exported: true,
439 delegating_method_count: 0,
440 field_count: field_names.len(),
441 field_names,
442 field_types,
443 has_behavior: method_count > 0,
444 is_interface: false,
445 parent_name: first_field_type,
448 override_count: 0,
449 self_call_count: 0,
450 has_listener_field: false,
451 has_notify_method: false,
452 })
453}
454
455fn extract_field_info(body: Node, src: &[u8]) -> (Vec<String>, Vec<String>, Option<String>) {
456 let mut names = Vec::new();
457 let mut types = Vec::new();
458 let mut first_type = None;
459 let mut cursor = body.walk();
460 for child in body.children(&mut cursor) {
461 if child.kind() == "field_declaration" {
462 if let Some(decl) = child.child_by_field_name("declarator") {
463 names.push(node_text(decl, src).to_string());
464 }
465 let ty = child
466 .child_by_field_name("type")
467 .map(|t| node_text(t, src).to_string());
468 if first_type.is_none() {
469 first_type = ty.clone();
470 }
471 types.push(ty.unwrap_or_default());
472 }
473 }
474 (names, types, first_type)
475}
476
477fn count_methods(body: Node) -> usize {
478 let mut count = 0;
479 let mut cursor = body.walk();
480 for child in body.children(&mut cursor) {
481 if child.kind() == "function_definition" || child.kind() == "declaration" {
482 count += 1;
483 }
484 }
485 count
486}
487
488fn extract_include(node: Node, src: &[u8]) -> Option<ImportInfo> {
489 let path = node.child_by_field_name("path")?;
490 let text = node_text(path, src)
491 .trim_matches(|c| c == '"' || c == '<' || c == '>')
492 .to_string();
493 Some(ImportInfo {
494 source: text,
495 line: node.start_position().row + 1,
496 col: node.start_position().column,
497 ..Default::default()
498 })
499}
500
501fn count_complexity(node: Node) -> usize {
502 let mut c = 1usize;
503 let mut cursor = node.walk();
504 visit_all(node, &mut cursor, &mut |n| match n.kind() {
505 "if_statement"
506 | "for_statement"
507 | "while_statement"
508 | "do_statement"
509 | "case_statement"
510 | "catch_clause"
511 | "conditional_expression" => c += 1,
512 "binary_expression" => {
513 if let Some(op) = n.child_by_field_name("operator") {
514 let kind = op.kind();
515 if kind == "&&" || kind == "||" {
516 c += 1;
517 }
518 }
519 }
520 _ => {}
521 });
522 c
523}
524
525fn max_chain_depth(node: Node) -> usize {
526 let mut max = 0;
527 let mut cursor = node.walk();
528 visit_all(node, &mut cursor, &mut |n| {
529 if n.kind() == "field_expression" {
530 let d = chain_len(n);
531 if d > max {
532 max = d;
533 }
534 }
535 });
536 max
537}
538
539fn chain_len(node: Node) -> usize {
540 let mut depth = 0;
541 let mut current = node;
542 while current.kind() == "field_expression" || current.kind() == "call_expression" {
543 if current.kind() == "field_expression" {
544 depth += 1;
545 }
546 match current.child(0) {
547 Some(c) => current = c,
548 None => break,
549 }
550 }
551 depth
552}
553
554fn count_case_labels(node: Node) -> usize {
555 let mut count = 0;
556 let mut cursor = node.walk();
557 visit_all(node, &mut cursor, &mut |n| {
558 if n.kind() == "case_statement" {
559 count += 1;
560 }
561 });
562 count
563}
564
565fn cognitive_complexity_c(node: tree_sitter::Node) -> usize {
566 let mut score = 0;
567 cc_walk_c(node, 0, &mut score);
568 score
569}
570
571fn cc_walk_c(node: tree_sitter::Node, nesting: usize, score: &mut usize) {
572 match node.kind() {
573 "if_statement" => {
574 *score += 1 + nesting;
575 cc_children_c(node, nesting + 1, score);
576 return;
577 }
578 "for_statement" | "while_statement" | "do_statement" => {
579 *score += 1 + nesting;
580 cc_children_c(node, nesting + 1, score);
581 return;
582 }
583 "switch_statement" => {
584 *score += 1 + nesting;
585 cc_children_c(node, nesting + 1, score);
586 return;
587 }
588 "else_clause" => {
589 *score += 1;
590 }
591 "binary_expression" => {
592 if let Some(op) = node.child_by_field_name("operator")
593 && (op.kind() == "&&" || op.kind() == "||")
594 {
595 *score += 1;
596 }
597 }
598 "catch_clause" => {
599 *score += 1 + nesting;
600 cc_children_c(node, nesting + 1, score);
601 return;
602 }
603 _ => {}
604 }
605 cc_children_c(node, nesting, score);
606}
607
608fn cc_children_c(node: tree_sitter::Node, nesting: usize, score: &mut usize) {
609 let mut cursor = node.walk();
610 for child in node.children(&mut cursor) {
611 cc_walk_c(child, nesting, score);
612 }
613}
614
615fn collect_external_refs_c(body: Node, src: &[u8]) -> Vec<String> {
616 let mut refs = Vec::new();
617 let mut cursor = body.walk();
618 visit_all(body, &mut cursor, &mut |n| {
619 if n.kind() == "field_expression"
620 && let Some(obj) = n.child(0)
621 && obj.kind() == "identifier"
622 {
623 let name = node_text(obj, src).to_string();
624 if !refs.contains(&name) {
625 refs.push(name);
626 }
627 }
628 });
629 refs
630}
631
632fn check_delegating_c(body: Node, src: &[u8]) -> bool {
633 let mut cursor = body.walk();
634 let stmts: Vec<Node> = body
635 .children(&mut cursor)
636 .filter(|n| n.kind() != "{" && n.kind() != "}" && !n.kind().contains("comment"))
637 .collect();
638 if stmts.len() != 1 {
639 return false;
640 }
641 let stmt = stmts[0];
642 let call = match stmt.kind() {
643 "return_statement" => stmt.child(1).filter(|c| c.kind() == "call_expression"),
644 "expression_statement" => stmt.child(0).filter(|c| c.kind() == "call_expression"),
645 _ => None,
646 };
647 call.and_then(|c| c.child(0))
648 .is_some_and(|f| node_text(f, src).contains('.') || node_text(f, src).contains("->"))
649}
650
651fn collect_field_refs_c(body: Node, src: &[u8]) -> Vec<String> {
652 let mut refs = Vec::new();
653 let mut cursor = body.walk();
654 visit_all(body, &mut cursor, &mut |n| {
655 if n.kind() == "field_expression"
656 && let Some(field) = n.child_by_field_name("field")
657 {
658 let name = node_text(field, src).to_string();
659 if !refs.contains(&name) {
660 refs.push(name);
661 }
662 }
663 });
664 refs
665}
666
667fn collect_null_checks_c(body: Node, src: &[u8]) -> Vec<String> {
668 let mut fields = Vec::new();
669 let mut cursor = body.walk();
670 visit_all(body, &mut cursor, &mut |n| {
671 if n.kind() == "binary_expression" {
672 let text = node_text(n, src);
673 if (text.contains("NULL") || text.contains("nullptr"))
674 && let Some(left) = n.child(0)
675 {
676 let name = node_text(left, src).to_string();
677 if !fields.contains(&name) {
678 fields.push(name);
679 }
680 }
681 }
682 });
683 fields
684}
685
686fn extract_switch_target_c(body: Node, src: &[u8]) -> Option<String> {
687 let mut cursor = body.walk();
688 let mut target = None;
689 visit_all(body, &mut cursor, &mut |n| {
690 if n.kind() == "switch_statement"
691 && target.is_none()
692 && let Some(cond) = n.child_by_field_name("condition")
693 {
694 target = Some(node_text(cond, src).trim_matches(['(', ')']).to_string());
695 }
696 });
697 target
698}
699
700fn collect_calls_c(body: Node, src: &[u8]) -> Vec<String> {
701 let mut calls = Vec::new();
702 let mut cursor = body.walk();
703 visit_all(body, &mut cursor, &mut |n| {
704 if n.kind() == "call_expression"
705 && let Some(func) = n.child(0)
706 {
707 let name = node_text(func, src).to_string();
708 if !calls.contains(&name) {
709 calls.push(name);
710 }
711 }
712 });
713 calls
714}
715
716fn count_comment_lines(node: Node, src: &[u8]) -> usize {
717 let mut count = 0;
718 let mut cursor = node.walk();
719 visit_all(node, &mut cursor, &mut |n| {
720 if n.kind() == "comment" {
721 count += node_text(n, src).lines().count();
722 }
723 });
724 count
725}
726
727fn hash_ast(node: Node) -> u64 {
728 let mut hasher = DefaultHasher::new();
729 hash_node(node, &mut hasher);
730 hasher.finish()
731}
732
733fn hash_node(node: Node, hasher: &mut DefaultHasher) {
734 node.kind().hash(hasher);
735 let mut cursor = node.walk();
736 for child in node.children(&mut cursor) {
737 hash_node(child, hasher);
738 }
739}
740
741fn node_text<'a>(node: Node, src: &'a [u8]) -> &'a str {
742 node.utf8_text(src).unwrap_or("")
743}
744
745fn collect_comments(root: Node, src: &[u8]) -> Vec<cha_core::CommentInfo> {
746 let mut comments = Vec::new();
747 let mut cursor = root.walk();
748 visit_all(root, &mut cursor, &mut |n| {
749 if n.kind().contains("comment") {
750 comments.push(cha_core::CommentInfo {
751 text: node_text(n, src).to_string(),
752 line: n.start_position().row + 1,
753 });
754 }
755 });
756 comments
757}
758
759fn visit_all<F: FnMut(Node)>(node: Node, cursor: &mut tree_sitter::TreeCursor, f: &mut F) {
760 f(node);
761 if cursor.goto_first_child() {
762 loop {
763 let child_node = cursor.node();
764 let mut child_cursor = child_node.walk();
765 visit_all(child_node, &mut child_cursor, f);
766 if !cursor.goto_next_sibling() {
767 break;
768 }
769 }
770 cursor.goto_parent();
771 }
772}