1use std::collections::hash_map::DefaultHasher;
2use std::hash::{Hash, Hasher};
3
4use cha_core::{ClassInfo, FunctionInfo, ImportInfo, SourceFile, SourceModel};
5use tree_sitter::{Node, Parser};
6
7use crate::LanguageParser;
8
9pub struct CParser;
10pub struct CppParser;
11
12impl LanguageParser for CParser {
13 fn language_name(&self) -> &str {
14 "c"
15 }
16 fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
17 parse_c_like(file, "c", &tree_sitter_c::LANGUAGE.into())
18 }
19}
20
21impl LanguageParser for CppParser {
22 fn language_name(&self) -> &str {
23 "cpp"
24 }
25 fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
26 parse_c_like(file, "cpp", &tree_sitter_cpp::LANGUAGE.into())
27 }
28}
29
30fn parse_c_like(
31 file: &SourceFile,
32 lang: &str,
33 language: &tree_sitter::Language,
34) -> Option<SourceModel> {
35 let mut parser = Parser::new();
36 parser.set_language(language).ok()?;
37 let tree = parser.parse(&file.content, None)?;
38 let root = tree.root_node();
39 let src = file.content.as_bytes();
40
41 let mut functions = Vec::new();
42 let mut classes = Vec::new();
43 let mut imports = Vec::new();
44 let mut type_aliases = Vec::new();
45
46 collect_top_level(
47 root,
48 src,
49 &mut functions,
50 &mut classes,
51 &mut imports,
52 &mut type_aliases,
53 );
54
55 associate_methods(&functions, &mut classes);
59
60 if is_header_file(file) {
61 for f in &mut functions {
62 f.is_exported = true;
63 }
64 }
65
66 Some(SourceModel {
67 language: lang.into(),
68 total_lines: file.line_count(),
69 functions,
70 classes,
71 imports,
72 comments: collect_comments(root, src),
73 type_aliases,
74 })
75}
76
77fn is_header_file(file: &SourceFile) -> bool {
78 file.path
79 .extension()
80 .is_some_and(|e| e == "h" || e == "hxx" || e == "hpp")
81}
82
83fn associate_methods(functions: &[FunctionInfo], classes: &mut [ClassInfo]) {
86 for class in classes.iter_mut() {
87 let count = functions
88 .iter()
89 .filter(|f| {
90 f.parameter_types.first().is_some_and(|t| {
91 t.contains('*') && t.split('*').next().unwrap_or("").trim() == class.name
92 })
93 })
94 .count();
95 if count > 0 {
96 class.method_count += count;
97 class.has_behavior = true;
98 }
99 }
100}
101
102fn collect_top_level(
104 root: Node,
105 src: &[u8],
106 functions: &mut Vec<FunctionInfo>,
107 classes: &mut Vec<ClassInfo>,
108 imports: &mut Vec<ImportInfo>,
109 type_aliases: &mut Vec<(String, String)>,
110) {
111 let mut cursor = root.walk();
112 for child in root.children(&mut cursor) {
113 match child.kind() {
114 "function_definition" => {
115 if let Some(c) = try_extract_macro_class(child, src) {
119 classes.push(c);
120 } else if let Some(f) = extract_function(child, src) {
121 functions.push(f);
122 }
123 }
124 "struct_specifier" | "class_specifier" => {
125 if let Some(c) = extract_class(child, src) {
126 classes.push(c);
127 }
128 }
129 "type_definition" => {
130 extract_typedef_struct(child, src, classes, type_aliases);
131 }
132 "preproc_include" => {
133 if let Some(imp) = extract_include(child, src) {
134 imports.push(imp);
135 }
136 }
137 _ => {
138 if child.child_count() > 0 {
139 collect_top_level(child, src, functions, classes, imports, type_aliases);
140 }
141 }
142 }
143 }
144}
145
146fn extract_typedef_struct(
147 node: Node,
148 src: &[u8],
149 classes: &mut Vec<ClassInfo>,
150 type_aliases: &mut Vec<(String, String)>,
151) {
152 let mut inner = node.walk();
153 for sub in node.children(&mut inner) {
154 if sub.kind() != "struct_specifier" && sub.kind() != "class_specifier" {
155 continue;
156 }
157 let Some(mut c) = extract_class(sub, src) else {
158 continue;
159 };
160 let original_name = c.name.clone();
161 if c.name.is_empty()
162 && let Some(decl) = node.child_by_field_name("declarator")
163 {
164 c.name = node_text(decl, src).to_string();
165 }
166 if !original_name.is_empty()
169 && let Some(decl) = node.child_by_field_name("declarator")
170 {
171 let alias = node_text(decl, src).to_string();
172 if alias != original_name {
173 type_aliases.push((alias, original_name));
174 }
175 }
176 if !c.name.is_empty() {
177 classes.push(c);
178 }
179 }
180}
181
182fn try_extract_macro_class(node: Node, src: &[u8]) -> Option<ClassInfo> {
185 let mut has_class_spec = false;
186 let mut cursor = node.walk();
187 for child in node.children(&mut cursor) {
188 if child.kind() == "class_specifier" || child.kind() == "struct_specifier" {
189 has_class_spec = true;
190 }
191 }
192 if !has_class_spec {
193 return None;
194 }
195 let name = node
197 .child_by_field_name("declarator")
198 .filter(|d| d.kind() == "identifier")
199 .map(|d| node_text(d, src).to_string())?;
200 let body = node.child_by_field_name("body")?;
201 let start_line = node.start_position().row + 1;
202 let end_line = node.end_position().row + 1;
203 let method_count = count_methods(body);
204 let (field_names, field_types, first_field_type) = extract_field_info(body, src);
205
206 let parent_name = first_field_type;
208
209 Some(ClassInfo {
210 name,
211 start_line,
212 end_line,
213 line_count: end_line - start_line + 1,
214 method_count,
215 is_exported: true,
216 delegating_method_count: 0,
217 field_count: field_names.len(),
218 field_names,
219 field_types,
220 has_behavior: method_count > 0,
221 is_interface: false,
222 parent_name,
223 override_count: 0,
224 self_call_count: 0,
225 has_listener_field: false,
226 has_notify_method: false,
227 })
228}
229
230fn extract_function(node: Node, src: &[u8]) -> Option<FunctionInfo> {
231 let declarator = node.child_by_field_name("declarator")?;
232 let name = find_func_name(declarator, src)?.to_string();
233 let start_line = node.start_position().row + 1;
234 let end_line = node.end_position().row + 1;
235 let body = node.child_by_field_name("body");
236 let (param_count, param_types) = extract_params(declarator, src);
237 let is_static = has_storage_class(node, src, "static");
238
239 Some(FunctionInfo {
240 name,
241 start_line,
242 end_line,
243 line_count: end_line - start_line + 1,
244 complexity: count_complexity(node),
245 body_hash: body.map(hash_ast),
246 is_exported: !is_static,
247 parameter_count: param_count,
248 parameter_types: param_types,
249 chain_depth: body.map(max_chain_depth).unwrap_or(0),
250 switch_arms: body.map(count_case_labels).unwrap_or(0),
251 external_refs: body
252 .map(|b| collect_external_refs_c(b, src))
253 .unwrap_or_default(),
254 is_delegating: body.map(|b| check_delegating_c(b, src)).unwrap_or(false),
255 comment_lines: count_comment_lines(node, src),
256 referenced_fields: body
257 .map(|b| collect_field_refs_c(b, src))
258 .unwrap_or_default(),
259 null_check_fields: body
260 .map(|b| collect_null_checks_c(b, src))
261 .unwrap_or_default(),
262 switch_dispatch_target: body.and_then(|b| extract_switch_target_c(b, src)),
263 optional_param_count: 0,
264 called_functions: body.map(|b| collect_calls_c(b, src)).unwrap_or_default(),
265 cognitive_complexity: body.map(cognitive_complexity_c).unwrap_or(0),
266 })
267}
268
269fn has_storage_class(node: Node, src: &[u8], keyword: &str) -> bool {
271 for i in 0..node.child_count() {
272 if let Some(child) = node.child(i)
273 && child.kind() == "storage_class_specifier"
274 && node_text(child, src) == keyword
275 {
276 return true;
277 }
278 }
279 false
280}
281
282fn find_func_name<'a>(declarator: Node<'a>, src: &'a [u8]) -> Option<&'a str> {
283 if declarator.kind() == "identifier" {
285 return Some(node_text(declarator, src));
286 }
287 declarator
288 .child_by_field_name("declarator")
289 .and_then(|d| find_func_name(d, src))
290}
291
292fn extract_params(declarator: Node, src: &[u8]) -> (usize, Vec<String>) {
293 let params = match declarator.child_by_field_name("parameters") {
294 Some(p) => p,
295 None => return (0, vec![]),
296 };
297 let mut count = 0;
298 let mut types = Vec::new();
299 let mut cursor = params.walk();
300 for child in params.children(&mut cursor) {
301 if child.kind() == "parameter_declaration" {
302 count += 1;
303 let base = child
304 .child_by_field_name("type")
305 .map(|t| node_text(t, src).to_string())
306 .unwrap_or_else(|| "int".into());
307 let is_ptr = child
308 .child_by_field_name("declarator")
309 .is_some_and(|d| d.kind() == "pointer_declarator");
310 types.push(if is_ptr { format!("{base} *") } else { base });
311 }
312 }
313 (count, types)
314}
315
316fn extract_class(node: Node, src: &[u8]) -> Option<ClassInfo> {
317 let name = node
318 .child_by_field_name("name")
319 .map(|n| node_text(n, src).to_string())
320 .unwrap_or_default();
321 let start_line = node.start_position().row + 1;
322 let end_line = node.end_position().row + 1;
323 let body = node.child_by_field_name("body");
324 let method_count = body.map(count_methods).unwrap_or(0);
325 let (field_names, field_types, first_field_type) =
326 body.map(|b| extract_field_info(b, src)).unwrap_or_default();
327
328 Some(ClassInfo {
329 name,
330 start_line,
331 end_line,
332 line_count: end_line - start_line + 1,
333 method_count,
334 is_exported: true,
335 delegating_method_count: 0,
336 field_count: field_names.len(),
337 field_names,
338 field_types,
339 has_behavior: method_count > 0,
340 is_interface: false,
341 parent_name: first_field_type,
344 override_count: 0,
345 self_call_count: 0,
346 has_listener_field: false,
347 has_notify_method: false,
348 })
349}
350
351fn extract_field_info(body: Node, src: &[u8]) -> (Vec<String>, Vec<String>, Option<String>) {
352 let mut names = Vec::new();
353 let mut types = Vec::new();
354 let mut first_type = None;
355 let mut cursor = body.walk();
356 for child in body.children(&mut cursor) {
357 if child.kind() == "field_declaration" {
358 if let Some(decl) = child.child_by_field_name("declarator") {
359 names.push(node_text(decl, src).to_string());
360 }
361 let ty = child
362 .child_by_field_name("type")
363 .map(|t| node_text(t, src).to_string());
364 if first_type.is_none() {
365 first_type = ty.clone();
366 }
367 types.push(ty.unwrap_or_default());
368 }
369 }
370 (names, types, first_type)
371}
372
373fn count_methods(body: Node) -> usize {
374 let mut count = 0;
375 let mut cursor = body.walk();
376 for child in body.children(&mut cursor) {
377 if child.kind() == "function_definition" || child.kind() == "declaration" {
378 count += 1;
379 }
380 }
381 count
382}
383
384fn extract_include(node: Node, src: &[u8]) -> Option<ImportInfo> {
385 let path = node.child_by_field_name("path")?;
386 let text = node_text(path, src)
387 .trim_matches(|c| c == '"' || c == '<' || c == '>')
388 .to_string();
389 Some(ImportInfo {
390 source: text,
391 line: node.start_position().row + 1,
392 })
393}
394
395fn count_complexity(node: Node) -> usize {
396 let mut c = 1usize;
397 let mut cursor = node.walk();
398 visit_all(node, &mut cursor, &mut |n| match n.kind() {
399 "if_statement"
400 | "for_statement"
401 | "while_statement"
402 | "do_statement"
403 | "case_statement"
404 | "catch_clause"
405 | "conditional_expression" => c += 1,
406 "binary_expression" => {
407 if let Some(op) = n.child_by_field_name("operator") {
408 let kind = op.kind();
409 if kind == "&&" || kind == "||" {
410 c += 1;
411 }
412 }
413 }
414 _ => {}
415 });
416 c
417}
418
419fn max_chain_depth(node: Node) -> usize {
420 let mut max = 0;
421 let mut cursor = node.walk();
422 visit_all(node, &mut cursor, &mut |n| {
423 if n.kind() == "field_expression" {
424 let d = chain_len(n);
425 if d > max {
426 max = d;
427 }
428 }
429 });
430 max
431}
432
433fn chain_len(node: Node) -> usize {
434 let mut depth = 0;
435 let mut current = node;
436 while current.kind() == "field_expression" || current.kind() == "call_expression" {
437 if current.kind() == "field_expression" {
438 depth += 1;
439 }
440 match current.child(0) {
441 Some(c) => current = c,
442 None => break,
443 }
444 }
445 depth
446}
447
448fn count_case_labels(node: Node) -> usize {
449 let mut count = 0;
450 let mut cursor = node.walk();
451 visit_all(node, &mut cursor, &mut |n| {
452 if n.kind() == "case_statement" {
453 count += 1;
454 }
455 });
456 count
457}
458
459fn cognitive_complexity_c(node: tree_sitter::Node) -> usize {
460 let mut score = 0;
461 cc_walk_c(node, 0, &mut score);
462 score
463}
464
465fn cc_walk_c(node: tree_sitter::Node, nesting: usize, score: &mut usize) {
466 match node.kind() {
467 "if_statement" => {
468 *score += 1 + nesting;
469 cc_children_c(node, nesting + 1, score);
470 return;
471 }
472 "for_statement" | "while_statement" | "do_statement" => {
473 *score += 1 + nesting;
474 cc_children_c(node, nesting + 1, score);
475 return;
476 }
477 "switch_statement" => {
478 *score += 1 + nesting;
479 cc_children_c(node, nesting + 1, score);
480 return;
481 }
482 "else_clause" => {
483 *score += 1;
484 }
485 "binary_expression" => {
486 if let Some(op) = node.child_by_field_name("operator")
487 && (op.kind() == "&&" || op.kind() == "||")
488 {
489 *score += 1;
490 }
491 }
492 "catch_clause" => {
493 *score += 1 + nesting;
494 cc_children_c(node, nesting + 1, score);
495 return;
496 }
497 _ => {}
498 }
499 cc_children_c(node, nesting, score);
500}
501
502fn cc_children_c(node: tree_sitter::Node, nesting: usize, score: &mut usize) {
503 let mut cursor = node.walk();
504 for child in node.children(&mut cursor) {
505 cc_walk_c(child, nesting, score);
506 }
507}
508
509fn collect_external_refs_c(body: Node, src: &[u8]) -> Vec<String> {
510 let mut refs = Vec::new();
511 let mut cursor = body.walk();
512 visit_all(body, &mut cursor, &mut |n| {
513 if n.kind() == "field_expression"
514 && let Some(obj) = n.child(0)
515 && obj.kind() == "identifier"
516 {
517 let name = node_text(obj, src).to_string();
518 if !refs.contains(&name) {
519 refs.push(name);
520 }
521 }
522 });
523 refs
524}
525
526fn check_delegating_c(body: Node, src: &[u8]) -> bool {
527 let mut cursor = body.walk();
528 let stmts: Vec<Node> = body
529 .children(&mut cursor)
530 .filter(|n| n.kind() != "{" && n.kind() != "}" && !n.kind().contains("comment"))
531 .collect();
532 if stmts.len() != 1 {
533 return false;
534 }
535 let stmt = stmts[0];
536 let call = match stmt.kind() {
537 "return_statement" => stmt.child(1).filter(|c| c.kind() == "call_expression"),
538 "expression_statement" => stmt.child(0).filter(|c| c.kind() == "call_expression"),
539 _ => None,
540 };
541 call.and_then(|c| c.child(0))
542 .is_some_and(|f| node_text(f, src).contains('.') || node_text(f, src).contains("->"))
543}
544
545fn collect_field_refs_c(body: Node, src: &[u8]) -> Vec<String> {
546 let mut refs = Vec::new();
547 let mut cursor = body.walk();
548 visit_all(body, &mut cursor, &mut |n| {
549 if n.kind() == "field_expression"
550 && let Some(field) = n.child_by_field_name("field")
551 {
552 let name = node_text(field, src).to_string();
553 if !refs.contains(&name) {
554 refs.push(name);
555 }
556 }
557 });
558 refs
559}
560
561fn collect_null_checks_c(body: Node, src: &[u8]) -> Vec<String> {
562 let mut fields = Vec::new();
563 let mut cursor = body.walk();
564 visit_all(body, &mut cursor, &mut |n| {
565 if n.kind() == "binary_expression" {
566 let text = node_text(n, src);
567 if (text.contains("NULL") || text.contains("nullptr"))
568 && let Some(left) = n.child(0)
569 {
570 let name = node_text(left, src).to_string();
571 if !fields.contains(&name) {
572 fields.push(name);
573 }
574 }
575 }
576 });
577 fields
578}
579
580fn extract_switch_target_c(body: Node, src: &[u8]) -> Option<String> {
581 let mut cursor = body.walk();
582 let mut target = None;
583 visit_all(body, &mut cursor, &mut |n| {
584 if n.kind() == "switch_statement"
585 && target.is_none()
586 && let Some(cond) = n.child_by_field_name("condition")
587 {
588 target = Some(node_text(cond, src).trim_matches(['(', ')']).to_string());
589 }
590 });
591 target
592}
593
594fn collect_calls_c(body: Node, src: &[u8]) -> Vec<String> {
595 let mut calls = Vec::new();
596 let mut cursor = body.walk();
597 visit_all(body, &mut cursor, &mut |n| {
598 if n.kind() == "call_expression"
599 && let Some(func) = n.child(0)
600 {
601 let name = node_text(func, src).to_string();
602 if !calls.contains(&name) {
603 calls.push(name);
604 }
605 }
606 });
607 calls
608}
609
610fn count_comment_lines(node: Node, src: &[u8]) -> usize {
611 let mut count = 0;
612 let mut cursor = node.walk();
613 visit_all(node, &mut cursor, &mut |n| {
614 if n.kind() == "comment" {
615 count += node_text(n, src).lines().count();
616 }
617 });
618 count
619}
620
621fn hash_ast(node: Node) -> u64 {
622 let mut hasher = DefaultHasher::new();
623 hash_node(node, &mut hasher);
624 hasher.finish()
625}
626
627fn hash_node(node: Node, hasher: &mut DefaultHasher) {
628 node.kind().hash(hasher);
629 let mut cursor = node.walk();
630 for child in node.children(&mut cursor) {
631 hash_node(child, hasher);
632 }
633}
634
635fn node_text<'a>(node: Node, src: &'a [u8]) -> &'a str {
636 node.utf8_text(src).unwrap_or("")
637}
638
639fn collect_comments(root: Node, src: &[u8]) -> Vec<cha_core::CommentInfo> {
640 let mut comments = Vec::new();
641 let mut cursor = root.walk();
642 visit_all(root, &mut cursor, &mut |n| {
643 if n.kind().contains("comment") {
644 comments.push(cha_core::CommentInfo {
645 text: node_text(n, src).to_string(),
646 line: n.start_position().row + 1,
647 });
648 }
649 });
650 comments
651}
652
653fn visit_all<F: FnMut(Node)>(node: Node, cursor: &mut tree_sitter::TreeCursor, f: &mut F) {
654 f(node);
655 if cursor.goto_first_child() {
656 loop {
657 let child_node = cursor.node();
658 let mut child_cursor = child_node.walk();
659 visit_all(child_node, &mut child_cursor, f);
660 if !cursor.goto_next_sibling() {
661 break;
662 }
663 }
664 cursor.goto_parent();
665 }
666}