1use std::collections::hash_map::DefaultHasher;
2use std::hash::{Hash, Hasher};
3
4use cha_core::{ClassInfo, FunctionInfo, ImportInfo, SourceFile, SourceModel};
5use tree_sitter::{Node, Parser};
6
7use crate::LanguageParser;
8
9pub struct CParser;
10pub struct CppParser;
11
12impl LanguageParser for CParser {
13 fn language_name(&self) -> &str {
14 "c"
15 }
16 fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
17 parse_c_like(file, "c", &tree_sitter_c::LANGUAGE.into())
18 }
19}
20
21impl LanguageParser for CppParser {
22 fn language_name(&self) -> &str {
23 "cpp"
24 }
25 fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
26 parse_c_like(file, "cpp", &tree_sitter_cpp::LANGUAGE.into())
27 }
28}
29
30fn parse_c_like(
31 file: &SourceFile,
32 lang: &str,
33 language: &tree_sitter::Language,
34) -> Option<SourceModel> {
35 let mut parser = Parser::new();
36 parser.set_language(language).ok()?;
37 let tree = parser.parse(&file.content, None)?;
38 let root = tree.root_node();
39 let src = file.content.as_bytes();
40
41 let mut functions = Vec::new();
42 let mut classes = Vec::new();
43 let mut imports = Vec::new();
44 let mut type_aliases = Vec::new();
45
46 let imports_map = crate::c_imports::build(root, src);
47 collect_top_level(
48 root,
49 src,
50 &imports_map,
51 &mut functions,
52 &mut classes,
53 &mut imports,
54 &mut type_aliases,
55 );
56
57 associate_methods(&functions, &mut classes);
61
62 if is_header_file(file) {
63 for f in &mut functions {
64 f.is_exported = true;
65 }
66 }
67
68 Some(SourceModel {
69 language: lang.into(),
70 total_lines: file.line_count(),
71 functions,
72 classes,
73 imports,
74 comments: collect_comments(root, src),
75 type_aliases,
76 })
77}
78
79fn is_header_file(file: &SourceFile) -> bool {
80 file.path
81 .extension()
82 .is_some_and(|e| e == "h" || e == "hxx" || e == "hpp")
83}
84
85fn associate_methods(functions: &[FunctionInfo], classes: &mut [ClassInfo]) {
88 for class in classes.iter_mut() {
89 let count = functions
90 .iter()
91 .filter(|f| {
92 f.parameter_types.first().is_some_and(|t| {
93 t.raw.contains('*')
94 && t.raw.split('*').next().unwrap_or("").trim() == class.name
95 })
96 })
97 .count();
98 if count > 0 {
99 class.method_count += count;
100 class.has_behavior = true;
101 }
102 }
103}
104
105fn collect_top_level(
107 root: Node,
108 src: &[u8],
109 imports_map: &crate::type_ref::ImportsMap,
110 functions: &mut Vec<FunctionInfo>,
111 classes: &mut Vec<ClassInfo>,
112 imports: &mut Vec<ImportInfo>,
113 type_aliases: &mut Vec<(String, String)>,
114) {
115 let mut cursor = root.walk();
116 for child in root.children(&mut cursor) {
117 match child.kind() {
118 "function_definition" => {
119 if let Some(c) = try_extract_macro_class(child, src) {
123 classes.push(c);
124 } else if let Some(f) = extract_function(child, src, imports_map) {
125 functions.push(f);
126 }
127 }
128 "struct_specifier" | "class_specifier" => {
129 if let Some(c) = extract_class(child, src) {
130 classes.push(c);
131 }
132 }
133 "type_definition" => {
134 extract_typedef_struct(child, src, classes, type_aliases);
135 }
136 "preproc_include" => {
137 if let Some(imp) = extract_include(child, src) {
138 imports.push(imp);
139 }
140 }
141 _ => {
142 if child.child_count() > 0 {
143 collect_top_level(
144 child,
145 src,
146 imports_map,
147 functions,
148 classes,
149 imports,
150 type_aliases,
151 );
152 }
153 }
154 }
155 }
156}
157
158fn extract_typedef_struct(
159 node: Node,
160 src: &[u8],
161 classes: &mut Vec<ClassInfo>,
162 type_aliases: &mut Vec<(String, String)>,
163) {
164 let found_struct = register_typedef_struct_children(node, src, classes, type_aliases);
165 if !found_struct {
166 register_simple_typedef(node, src, type_aliases);
167 }
168}
169
170fn register_typedef_struct_children(
171 node: Node,
172 src: &[u8],
173 classes: &mut Vec<ClassInfo>,
174 type_aliases: &mut Vec<(String, String)>,
175) -> bool {
176 let mut found_struct = false;
177 let mut inner = node.walk();
178 for sub in node.children(&mut inner) {
179 if sub.kind() != "struct_specifier" && sub.kind() != "class_specifier" {
180 continue;
181 }
182 found_struct = true;
183 register_single_typedef_struct(node, sub, src, classes, type_aliases);
184 }
185 found_struct
186}
187
188fn register_single_typedef_struct(
189 typedef: Node,
190 sub: Node,
191 src: &[u8],
192 classes: &mut Vec<ClassInfo>,
193 type_aliases: &mut Vec<(String, String)>,
194) {
195 let Some(mut c) = extract_class(sub, src) else {
196 return;
197 };
198 let original_name = c.name.clone();
199 if c.name.is_empty()
200 && let Some(decl) = typedef.child_by_field_name("declarator")
201 {
202 c.name = node_text(decl, src).to_string();
203 }
204 if !original_name.is_empty()
205 && let Some(decl) = typedef.child_by_field_name("declarator")
206 {
207 let alias = node_text(decl, src).to_string();
208 if alias != original_name {
209 type_aliases.push((alias, original_name));
210 }
211 }
212 if !c.name.is_empty() {
213 classes.push(c);
214 }
215}
216
217fn register_simple_typedef(node: Node, src: &[u8], type_aliases: &mut Vec<(String, String)>) {
219 let alias = extract_typedef_alias(node, src);
220 let original = node
221 .child_by_field_name("type")
222 .map(|t| node_text(t, src).trim().to_string())
223 .unwrap_or_default();
224 if !alias.is_empty() && alias != original {
225 type_aliases.push((alias, original));
226 }
227}
228
229fn extract_typedef_alias(node: Node, src: &[u8]) -> String {
234 if let Some(decl) = node.child_by_field_name("declarator") {
235 return node_text(decl, src).trim().to_string();
236 }
237 let mut cursor = node.walk();
238 for child in node.children(&mut cursor) {
239 if child.kind() == "type_identifier" {
240 return node_text(child, src).trim().to_string();
241 }
242 }
243 String::new()
244}
245
246fn try_extract_macro_class(node: Node, src: &[u8]) -> Option<ClassInfo> {
249 let mut has_class_spec = false;
250 let mut cursor = node.walk();
251 for child in node.children(&mut cursor) {
252 if child.kind() == "class_specifier" || child.kind() == "struct_specifier" {
253 has_class_spec = true;
254 }
255 }
256 if !has_class_spec {
257 return None;
258 }
259 let name_node = node
261 .child_by_field_name("declarator")
262 .filter(|d| d.kind() == "identifier")?;
263 let name = node_text(name_node, src).to_string();
264 let name_col = name_node.start_position().column;
265 let name_end_col = name_node.end_position().column;
266 let body = node.child_by_field_name("body")?;
267 let start_line = node.start_position().row + 1;
268 let end_line = node.end_position().row + 1;
269 let method_count = count_methods(body);
270 let (field_names, field_types, first_field_type) = extract_field_info(body, src);
271
272 let parent_name = first_field_type;
274
275 Some(ClassInfo {
276 name,
277 start_line,
278 end_line,
279 name_col,
280 name_end_col,
281 line_count: end_line - start_line + 1,
282 method_count,
283 is_exported: true,
284 delegating_method_count: 0,
285 field_count: field_names.len(),
286 field_names,
287 field_types,
288 has_behavior: method_count > 0,
289 is_interface: false,
290 parent_name,
291 override_count: 0,
292 self_call_count: 0,
293 has_listener_field: false,
294 has_notify_method: false,
295 })
296}
297
298fn extract_function(
299 node: Node,
300 src: &[u8],
301 imports_map: &crate::type_ref::ImportsMap,
302) -> Option<FunctionInfo> {
303 let declarator = node.child_by_field_name("declarator")?;
304 let name_node = find_func_name_node(declarator)?;
305 let name = node_text(name_node, src).to_string();
306 let name_col = name_node.start_position().column;
307 let name_end_col = name_node.end_position().column;
308 let start_line = node.start_position().row + 1;
309 let end_line = node.end_position().row + 1;
310 let body = node.child_by_field_name("body");
311 let (param_count, param_types) = extract_params(declarator, src, imports_map);
312 let is_static = has_storage_class(node, src, "static");
313
314 Some(FunctionInfo {
315 name,
316 start_line,
317 end_line,
318 name_col,
319 name_end_col,
320 line_count: end_line - start_line + 1,
321 complexity: count_complexity(node),
322 body_hash: body.map(hash_ast),
323 is_exported: !is_static,
324 parameter_count: param_count,
325 parameter_types: param_types,
326 chain_depth: body.map(max_chain_depth).unwrap_or(0),
327 switch_arms: body.map(count_case_labels).unwrap_or(0),
328 external_refs: body
329 .map(|b| collect_external_refs_c(b, src))
330 .unwrap_or_default(),
331 is_delegating: body.map(|b| check_delegating_c(b, src)).unwrap_or(false),
332 comment_lines: count_comment_lines(node, src),
333 referenced_fields: body
334 .map(|b| collect_field_refs_c(b, src))
335 .unwrap_or_default(),
336 null_check_fields: body
337 .map(|b| collect_null_checks_c(b, src))
338 .unwrap_or_default(),
339 switch_dispatch_target: body.and_then(|b| extract_switch_target_c(b, src)),
340 optional_param_count: 0,
341 called_functions: body.map(|b| collect_calls_c(b, src)).unwrap_or_default(),
342 cognitive_complexity: body.map(cognitive_complexity_c).unwrap_or(0),
343 })
344}
345
346fn has_storage_class(node: Node, src: &[u8], keyword: &str) -> bool {
348 for i in 0..node.child_count() {
349 if let Some(child) = node.child(i)
350 && child.kind() == "storage_class_specifier"
351 && node_text(child, src) == keyword
352 {
353 return true;
354 }
355 }
356 false
357}
358
359fn find_func_name_node(declarator: Node) -> Option<Node> {
360 if declarator.kind() == "identifier" {
361 return Some(declarator);
362 }
363 declarator
364 .child_by_field_name("declarator")
365 .and_then(find_func_name_node)
366}
367
368fn extract_params(
369 declarator: Node,
370 src: &[u8],
371 imports_map: &crate::type_ref::ImportsMap,
372) -> (usize, Vec<cha_core::TypeRef>) {
373 let params = match declarator.child_by_field_name("parameters") {
374 Some(p) => p,
375 None => return (0, vec![]),
376 };
377 let mut count = 0;
378 let mut types = Vec::new();
379 let mut cursor = params.walk();
380 for child in params.children(&mut cursor) {
381 if child.kind() == "parameter_declaration" {
382 count += 1;
383 let base = child
384 .child_by_field_name("type")
385 .map(|t| node_text(t, src).to_string())
386 .unwrap_or_else(|| "int".into());
387 let is_ptr = child
388 .child_by_field_name("declarator")
389 .is_some_and(|d| d.kind() == "pointer_declarator");
390 let raw = if is_ptr { format!("{base} *") } else { base };
391 types.push(crate::type_ref::resolve(raw, imports_map));
392 }
393 }
394 (count, types)
395}
396
397fn extract_class(node: Node, src: &[u8]) -> Option<ClassInfo> {
398 let name_node = node.child_by_field_name("name");
399 let name = name_node
400 .map(|n| node_text(n, src).to_string())
401 .unwrap_or_default();
402 let name_col = name_node.map(|n| n.start_position().column).unwrap_or(0);
403 let name_end_col = name_node.map(|n| n.end_position().column).unwrap_or(0);
404 let start_line = node.start_position().row + 1;
405 let end_line = node.end_position().row + 1;
406 let body = node.child_by_field_name("body");
407 let method_count = body.map(count_methods).unwrap_or(0);
408 let (field_names, field_types, first_field_type) =
409 body.map(|b| extract_field_info(b, src)).unwrap_or_default();
410
411 Some(ClassInfo {
412 name,
413 start_line,
414 end_line,
415 name_col,
416 name_end_col,
417 line_count: end_line - start_line + 1,
418 method_count,
419 is_exported: true,
420 delegating_method_count: 0,
421 field_count: field_names.len(),
422 field_names,
423 field_types,
424 has_behavior: method_count > 0,
425 is_interface: false,
426 parent_name: first_field_type,
429 override_count: 0,
430 self_call_count: 0,
431 has_listener_field: false,
432 has_notify_method: false,
433 })
434}
435
436fn extract_field_info(body: Node, src: &[u8]) -> (Vec<String>, Vec<String>, Option<String>) {
437 let mut names = Vec::new();
438 let mut types = Vec::new();
439 let mut first_type = None;
440 let mut cursor = body.walk();
441 for child in body.children(&mut cursor) {
442 if child.kind() == "field_declaration" {
443 if let Some(decl) = child.child_by_field_name("declarator") {
444 names.push(node_text(decl, src).to_string());
445 }
446 let ty = child
447 .child_by_field_name("type")
448 .map(|t| node_text(t, src).to_string());
449 if first_type.is_none() {
450 first_type = ty.clone();
451 }
452 types.push(ty.unwrap_or_default());
453 }
454 }
455 (names, types, first_type)
456}
457
458fn count_methods(body: Node) -> usize {
459 let mut count = 0;
460 let mut cursor = body.walk();
461 for child in body.children(&mut cursor) {
462 if child.kind() == "function_definition" || child.kind() == "declaration" {
463 count += 1;
464 }
465 }
466 count
467}
468
469fn extract_include(node: Node, src: &[u8]) -> Option<ImportInfo> {
470 let path = node.child_by_field_name("path")?;
471 let text = node_text(path, src)
472 .trim_matches(|c| c == '"' || c == '<' || c == '>')
473 .to_string();
474 Some(ImportInfo {
475 source: text,
476 line: node.start_position().row + 1,
477 col: node.start_position().column,
478 ..Default::default()
479 })
480}
481
482fn count_complexity(node: Node) -> usize {
483 let mut c = 1usize;
484 let mut cursor = node.walk();
485 visit_all(node, &mut cursor, &mut |n| match n.kind() {
486 "if_statement"
487 | "for_statement"
488 | "while_statement"
489 | "do_statement"
490 | "case_statement"
491 | "catch_clause"
492 | "conditional_expression" => c += 1,
493 "binary_expression" => {
494 if let Some(op) = n.child_by_field_name("operator") {
495 let kind = op.kind();
496 if kind == "&&" || kind == "||" {
497 c += 1;
498 }
499 }
500 }
501 _ => {}
502 });
503 c
504}
505
506fn max_chain_depth(node: Node) -> usize {
507 let mut max = 0;
508 let mut cursor = node.walk();
509 visit_all(node, &mut cursor, &mut |n| {
510 if n.kind() == "field_expression" {
511 let d = chain_len(n);
512 if d > max {
513 max = d;
514 }
515 }
516 });
517 max
518}
519
520fn chain_len(node: Node) -> usize {
521 let mut depth = 0;
522 let mut current = node;
523 while current.kind() == "field_expression" || current.kind() == "call_expression" {
524 if current.kind() == "field_expression" {
525 depth += 1;
526 }
527 match current.child(0) {
528 Some(c) => current = c,
529 None => break,
530 }
531 }
532 depth
533}
534
535fn count_case_labels(node: Node) -> usize {
536 let mut count = 0;
537 let mut cursor = node.walk();
538 visit_all(node, &mut cursor, &mut |n| {
539 if n.kind() == "case_statement" {
540 count += 1;
541 }
542 });
543 count
544}
545
546fn cognitive_complexity_c(node: tree_sitter::Node) -> usize {
547 let mut score = 0;
548 cc_walk_c(node, 0, &mut score);
549 score
550}
551
552fn cc_walk_c(node: tree_sitter::Node, nesting: usize, score: &mut usize) {
553 match node.kind() {
554 "if_statement" => {
555 *score += 1 + nesting;
556 cc_children_c(node, nesting + 1, score);
557 return;
558 }
559 "for_statement" | "while_statement" | "do_statement" => {
560 *score += 1 + nesting;
561 cc_children_c(node, nesting + 1, score);
562 return;
563 }
564 "switch_statement" => {
565 *score += 1 + nesting;
566 cc_children_c(node, nesting + 1, score);
567 return;
568 }
569 "else_clause" => {
570 *score += 1;
571 }
572 "binary_expression" => {
573 if let Some(op) = node.child_by_field_name("operator")
574 && (op.kind() == "&&" || op.kind() == "||")
575 {
576 *score += 1;
577 }
578 }
579 "catch_clause" => {
580 *score += 1 + nesting;
581 cc_children_c(node, nesting + 1, score);
582 return;
583 }
584 _ => {}
585 }
586 cc_children_c(node, nesting, score);
587}
588
589fn cc_children_c(node: tree_sitter::Node, nesting: usize, score: &mut usize) {
590 let mut cursor = node.walk();
591 for child in node.children(&mut cursor) {
592 cc_walk_c(child, nesting, score);
593 }
594}
595
596fn collect_external_refs_c(body: Node, src: &[u8]) -> Vec<String> {
597 let mut refs = Vec::new();
598 let mut cursor = body.walk();
599 visit_all(body, &mut cursor, &mut |n| {
600 if n.kind() == "field_expression"
601 && let Some(obj) = n.child(0)
602 && obj.kind() == "identifier"
603 {
604 let name = node_text(obj, src).to_string();
605 if !refs.contains(&name) {
606 refs.push(name);
607 }
608 }
609 });
610 refs
611}
612
613fn check_delegating_c(body: Node, src: &[u8]) -> bool {
614 let mut cursor = body.walk();
615 let stmts: Vec<Node> = body
616 .children(&mut cursor)
617 .filter(|n| n.kind() != "{" && n.kind() != "}" && !n.kind().contains("comment"))
618 .collect();
619 if stmts.len() != 1 {
620 return false;
621 }
622 let stmt = stmts[0];
623 let call = match stmt.kind() {
624 "return_statement" => stmt.child(1).filter(|c| c.kind() == "call_expression"),
625 "expression_statement" => stmt.child(0).filter(|c| c.kind() == "call_expression"),
626 _ => None,
627 };
628 call.and_then(|c| c.child(0))
629 .is_some_and(|f| node_text(f, src).contains('.') || node_text(f, src).contains("->"))
630}
631
632fn collect_field_refs_c(body: Node, src: &[u8]) -> Vec<String> {
633 let mut refs = Vec::new();
634 let mut cursor = body.walk();
635 visit_all(body, &mut cursor, &mut |n| {
636 if n.kind() == "field_expression"
637 && let Some(field) = n.child_by_field_name("field")
638 {
639 let name = node_text(field, src).to_string();
640 if !refs.contains(&name) {
641 refs.push(name);
642 }
643 }
644 });
645 refs
646}
647
648fn collect_null_checks_c(body: Node, src: &[u8]) -> Vec<String> {
649 let mut fields = Vec::new();
650 let mut cursor = body.walk();
651 visit_all(body, &mut cursor, &mut |n| {
652 if n.kind() == "binary_expression" {
653 let text = node_text(n, src);
654 if (text.contains("NULL") || text.contains("nullptr"))
655 && let Some(left) = n.child(0)
656 {
657 let name = node_text(left, src).to_string();
658 if !fields.contains(&name) {
659 fields.push(name);
660 }
661 }
662 }
663 });
664 fields
665}
666
667fn extract_switch_target_c(body: Node, src: &[u8]) -> Option<String> {
668 let mut cursor = body.walk();
669 let mut target = None;
670 visit_all(body, &mut cursor, &mut |n| {
671 if n.kind() == "switch_statement"
672 && target.is_none()
673 && let Some(cond) = n.child_by_field_name("condition")
674 {
675 target = Some(node_text(cond, src).trim_matches(['(', ')']).to_string());
676 }
677 });
678 target
679}
680
681fn collect_calls_c(body: Node, src: &[u8]) -> Vec<String> {
682 let mut calls = Vec::new();
683 let mut cursor = body.walk();
684 visit_all(body, &mut cursor, &mut |n| {
685 if n.kind() == "call_expression"
686 && let Some(func) = n.child(0)
687 {
688 let name = node_text(func, src).to_string();
689 if !calls.contains(&name) {
690 calls.push(name);
691 }
692 }
693 });
694 calls
695}
696
697fn count_comment_lines(node: Node, src: &[u8]) -> usize {
698 let mut count = 0;
699 let mut cursor = node.walk();
700 visit_all(node, &mut cursor, &mut |n| {
701 if n.kind() == "comment" {
702 count += node_text(n, src).lines().count();
703 }
704 });
705 count
706}
707
708fn hash_ast(node: Node) -> u64 {
709 let mut hasher = DefaultHasher::new();
710 hash_node(node, &mut hasher);
711 hasher.finish()
712}
713
714fn hash_node(node: Node, hasher: &mut DefaultHasher) {
715 node.kind().hash(hasher);
716 let mut cursor = node.walk();
717 for child in node.children(&mut cursor) {
718 hash_node(child, hasher);
719 }
720}
721
722fn node_text<'a>(node: Node, src: &'a [u8]) -> &'a str {
723 node.utf8_text(src).unwrap_or("")
724}
725
726fn collect_comments(root: Node, src: &[u8]) -> Vec<cha_core::CommentInfo> {
727 let mut comments = Vec::new();
728 let mut cursor = root.walk();
729 visit_all(root, &mut cursor, &mut |n| {
730 if n.kind().contains("comment") {
731 comments.push(cha_core::CommentInfo {
732 text: node_text(n, src).to_string(),
733 line: n.start_position().row + 1,
734 });
735 }
736 });
737 comments
738}
739
740fn visit_all<F: FnMut(Node)>(node: Node, cursor: &mut tree_sitter::TreeCursor, f: &mut F) {
741 f(node);
742 if cursor.goto_first_child() {
743 loop {
744 let child_node = cursor.node();
745 let mut child_cursor = child_node.walk();
746 visit_all(child_node, &mut child_cursor, f);
747 if !cursor.goto_next_sibling() {
748 break;
749 }
750 }
751 cursor.goto_parent();
752 }
753}