1use std::collections::hash_map::DefaultHasher;
2use std::hash::{Hash, Hasher};
3
4use cha_core::{ClassInfo, FunctionInfo, ImportInfo, SourceFile, SourceModel};
5use tree_sitter::{Node, Parser};
6
7use crate::LanguageParser;
8
9pub struct CParser;
10pub struct CppParser;
11
12impl LanguageParser for CParser {
13 fn language_name(&self) -> &str {
14 "c"
15 }
16 fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
17 parse_c_like(file, "c", &tree_sitter_c::LANGUAGE.into())
18 }
19}
20
21impl LanguageParser for CppParser {
22 fn language_name(&self) -> &str {
23 "cpp"
24 }
25 fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
26 parse_c_like(file, "cpp", &tree_sitter_cpp::LANGUAGE.into())
27 }
28}
29
30fn parse_c_like(
31 file: &SourceFile,
32 lang: &str,
33 language: &tree_sitter::Language,
34) -> Option<SourceModel> {
35 let mut parser = Parser::new();
36 parser.set_language(language).ok()?;
37 let tree = parser.parse(&file.content, None)?;
38 let root = tree.root_node();
39 let src = file.content.as_bytes();
40
41 let mut functions = Vec::new();
42 let mut classes = Vec::new();
43 let mut imports = Vec::new();
44 let mut type_aliases = Vec::new();
45
46 collect_top_level(
47 root,
48 src,
49 &mut functions,
50 &mut classes,
51 &mut imports,
52 &mut type_aliases,
53 );
54
55 associate_methods(&functions, &mut classes);
59
60 if is_header_file(file) {
61 for f in &mut functions {
62 f.is_exported = true;
63 }
64 }
65
66 Some(SourceModel {
67 language: lang.into(),
68 total_lines: file.line_count(),
69 functions,
70 classes,
71 imports,
72 comments: collect_comments(root, src),
73 type_aliases,
74 })
75}
76
77fn is_header_file(file: &SourceFile) -> bool {
78 file.path
79 .extension()
80 .is_some_and(|e| e == "h" || e == "hxx" || e == "hpp")
81}
82
83fn associate_methods(functions: &[FunctionInfo], classes: &mut [ClassInfo]) {
86 for class in classes.iter_mut() {
87 let count = functions
88 .iter()
89 .filter(|f| {
90 f.parameter_types.first().is_some_and(|t| {
91 t.contains('*') && t.split('*').next().unwrap_or("").trim() == class.name
92 })
93 })
94 .count();
95 if count > 0 {
96 class.method_count += count;
97 class.has_behavior = true;
98 }
99 }
100}
101
102fn collect_top_level(
104 root: Node,
105 src: &[u8],
106 functions: &mut Vec<FunctionInfo>,
107 classes: &mut Vec<ClassInfo>,
108 imports: &mut Vec<ImportInfo>,
109 type_aliases: &mut Vec<(String, String)>,
110) {
111 let mut cursor = root.walk();
112 for child in root.children(&mut cursor) {
113 match child.kind() {
114 "function_definition" => {
115 if let Some(c) = try_extract_macro_class(child, src) {
119 classes.push(c);
120 } else if let Some(f) = extract_function(child, src) {
121 functions.push(f);
122 }
123 }
124 "struct_specifier" | "class_specifier" => {
125 if let Some(c) = extract_class(child, src) {
126 classes.push(c);
127 }
128 }
129 "type_definition" => {
130 extract_typedef_struct(child, src, classes, type_aliases);
131 }
132 "preproc_include" => {
133 if let Some(imp) = extract_include(child, src) {
134 imports.push(imp);
135 }
136 }
137 _ => {
138 if child.child_count() > 0 {
139 collect_top_level(child, src, functions, classes, imports, type_aliases);
140 }
141 }
142 }
143 }
144}
145
146fn extract_typedef_struct(
147 node: Node,
148 src: &[u8],
149 classes: &mut Vec<ClassInfo>,
150 type_aliases: &mut Vec<(String, String)>,
151) {
152 let mut inner = node.walk();
153 for sub in node.children(&mut inner) {
154 if sub.kind() != "struct_specifier" && sub.kind() != "class_specifier" {
155 continue;
156 }
157 let Some(mut c) = extract_class(sub, src) else {
158 continue;
159 };
160 let original_name = c.name.clone();
161 if c.name.is_empty()
162 && let Some(decl) = node.child_by_field_name("declarator")
163 {
164 c.name = node_text(decl, src).to_string();
165 }
166 if !original_name.is_empty()
169 && let Some(decl) = node.child_by_field_name("declarator")
170 {
171 let alias = node_text(decl, src).to_string();
172 if alias != original_name {
173 type_aliases.push((alias, original_name));
174 }
175 }
176 if !c.name.is_empty() {
177 classes.push(c);
178 }
179 }
180}
181
182fn try_extract_macro_class(node: Node, src: &[u8]) -> Option<ClassInfo> {
185 let mut has_class_spec = false;
186 let mut cursor = node.walk();
187 for child in node.children(&mut cursor) {
188 if child.kind() == "class_specifier" || child.kind() == "struct_specifier" {
189 has_class_spec = true;
190 }
191 }
192 if !has_class_spec {
193 return None;
194 }
195 let name_node = node
197 .child_by_field_name("declarator")
198 .filter(|d| d.kind() == "identifier")?;
199 let name = node_text(name_node, src).to_string();
200 let name_col = name_node.start_position().column;
201 let name_end_col = name_node.end_position().column;
202 let body = node.child_by_field_name("body")?;
203 let start_line = node.start_position().row + 1;
204 let end_line = node.end_position().row + 1;
205 let method_count = count_methods(body);
206 let (field_names, field_types, first_field_type) = extract_field_info(body, src);
207
208 let parent_name = first_field_type;
210
211 Some(ClassInfo {
212 name,
213 start_line,
214 end_line,
215 name_col,
216 name_end_col,
217 line_count: end_line - start_line + 1,
218 method_count,
219 is_exported: true,
220 delegating_method_count: 0,
221 field_count: field_names.len(),
222 field_names,
223 field_types,
224 has_behavior: method_count > 0,
225 is_interface: false,
226 parent_name,
227 override_count: 0,
228 self_call_count: 0,
229 has_listener_field: false,
230 has_notify_method: false,
231 })
232}
233
234fn extract_function(node: Node, src: &[u8]) -> Option<FunctionInfo> {
235 let declarator = node.child_by_field_name("declarator")?;
236 let name_node = find_func_name_node(declarator)?;
237 let name = node_text(name_node, src).to_string();
238 let name_col = name_node.start_position().column;
239 let name_end_col = name_node.end_position().column;
240 let start_line = node.start_position().row + 1;
241 let end_line = node.end_position().row + 1;
242 let body = node.child_by_field_name("body");
243 let (param_count, param_types) = extract_params(declarator, src);
244 let is_static = has_storage_class(node, src, "static");
245
246 Some(FunctionInfo {
247 name,
248 start_line,
249 end_line,
250 name_col,
251 name_end_col,
252 line_count: end_line - start_line + 1,
253 complexity: count_complexity(node),
254 body_hash: body.map(hash_ast),
255 is_exported: !is_static,
256 parameter_count: param_count,
257 parameter_types: param_types,
258 chain_depth: body.map(max_chain_depth).unwrap_or(0),
259 switch_arms: body.map(count_case_labels).unwrap_or(0),
260 external_refs: body
261 .map(|b| collect_external_refs_c(b, src))
262 .unwrap_or_default(),
263 is_delegating: body.map(|b| check_delegating_c(b, src)).unwrap_or(false),
264 comment_lines: count_comment_lines(node, src),
265 referenced_fields: body
266 .map(|b| collect_field_refs_c(b, src))
267 .unwrap_or_default(),
268 null_check_fields: body
269 .map(|b| collect_null_checks_c(b, src))
270 .unwrap_or_default(),
271 switch_dispatch_target: body.and_then(|b| extract_switch_target_c(b, src)),
272 optional_param_count: 0,
273 called_functions: body.map(|b| collect_calls_c(b, src)).unwrap_or_default(),
274 cognitive_complexity: body.map(cognitive_complexity_c).unwrap_or(0),
275 })
276}
277
278fn has_storage_class(node: Node, src: &[u8], keyword: &str) -> bool {
280 for i in 0..node.child_count() {
281 if let Some(child) = node.child(i)
282 && child.kind() == "storage_class_specifier"
283 && node_text(child, src) == keyword
284 {
285 return true;
286 }
287 }
288 false
289}
290
291fn find_func_name_node(declarator: Node) -> Option<Node> {
292 if declarator.kind() == "identifier" {
293 return Some(declarator);
294 }
295 declarator
296 .child_by_field_name("declarator")
297 .and_then(find_func_name_node)
298}
299
300fn extract_params(declarator: Node, src: &[u8]) -> (usize, Vec<String>) {
301 let params = match declarator.child_by_field_name("parameters") {
302 Some(p) => p,
303 None => return (0, vec![]),
304 };
305 let mut count = 0;
306 let mut types = Vec::new();
307 let mut cursor = params.walk();
308 for child in params.children(&mut cursor) {
309 if child.kind() == "parameter_declaration" {
310 count += 1;
311 let base = child
312 .child_by_field_name("type")
313 .map(|t| node_text(t, src).to_string())
314 .unwrap_or_else(|| "int".into());
315 let is_ptr = child
316 .child_by_field_name("declarator")
317 .is_some_and(|d| d.kind() == "pointer_declarator");
318 types.push(if is_ptr { format!("{base} *") } else { base });
319 }
320 }
321 (count, types)
322}
323
324fn extract_class(node: Node, src: &[u8]) -> Option<ClassInfo> {
325 let name_node = node.child_by_field_name("name");
326 let name = name_node
327 .map(|n| node_text(n, src).to_string())
328 .unwrap_or_default();
329 let name_col = name_node.map(|n| n.start_position().column).unwrap_or(0);
330 let name_end_col = name_node.map(|n| n.end_position().column).unwrap_or(0);
331 let start_line = node.start_position().row + 1;
332 let end_line = node.end_position().row + 1;
333 let body = node.child_by_field_name("body");
334 let method_count = body.map(count_methods).unwrap_or(0);
335 let (field_names, field_types, first_field_type) =
336 body.map(|b| extract_field_info(b, src)).unwrap_or_default();
337
338 Some(ClassInfo {
339 name,
340 start_line,
341 end_line,
342 name_col,
343 name_end_col,
344 line_count: end_line - start_line + 1,
345 method_count,
346 is_exported: true,
347 delegating_method_count: 0,
348 field_count: field_names.len(),
349 field_names,
350 field_types,
351 has_behavior: method_count > 0,
352 is_interface: false,
353 parent_name: first_field_type,
356 override_count: 0,
357 self_call_count: 0,
358 has_listener_field: false,
359 has_notify_method: false,
360 })
361}
362
363fn extract_field_info(body: Node, src: &[u8]) -> (Vec<String>, Vec<String>, Option<String>) {
364 let mut names = Vec::new();
365 let mut types = Vec::new();
366 let mut first_type = None;
367 let mut cursor = body.walk();
368 for child in body.children(&mut cursor) {
369 if child.kind() == "field_declaration" {
370 if let Some(decl) = child.child_by_field_name("declarator") {
371 names.push(node_text(decl, src).to_string());
372 }
373 let ty = child
374 .child_by_field_name("type")
375 .map(|t| node_text(t, src).to_string());
376 if first_type.is_none() {
377 first_type = ty.clone();
378 }
379 types.push(ty.unwrap_or_default());
380 }
381 }
382 (names, types, first_type)
383}
384
385fn count_methods(body: Node) -> usize {
386 let mut count = 0;
387 let mut cursor = body.walk();
388 for child in body.children(&mut cursor) {
389 if child.kind() == "function_definition" || child.kind() == "declaration" {
390 count += 1;
391 }
392 }
393 count
394}
395
396fn extract_include(node: Node, src: &[u8]) -> Option<ImportInfo> {
397 let path = node.child_by_field_name("path")?;
398 let text = node_text(path, src)
399 .trim_matches(|c| c == '"' || c == '<' || c == '>')
400 .to_string();
401 Some(ImportInfo {
402 source: text,
403 line: node.start_position().row + 1,
404 col: node.start_position().column,
405 ..Default::default()
406 })
407}
408
409fn count_complexity(node: Node) -> usize {
410 let mut c = 1usize;
411 let mut cursor = node.walk();
412 visit_all(node, &mut cursor, &mut |n| match n.kind() {
413 "if_statement"
414 | "for_statement"
415 | "while_statement"
416 | "do_statement"
417 | "case_statement"
418 | "catch_clause"
419 | "conditional_expression" => c += 1,
420 "binary_expression" => {
421 if let Some(op) = n.child_by_field_name("operator") {
422 let kind = op.kind();
423 if kind == "&&" || kind == "||" {
424 c += 1;
425 }
426 }
427 }
428 _ => {}
429 });
430 c
431}
432
433fn max_chain_depth(node: Node) -> usize {
434 let mut max = 0;
435 let mut cursor = node.walk();
436 visit_all(node, &mut cursor, &mut |n| {
437 if n.kind() == "field_expression" {
438 let d = chain_len(n);
439 if d > max {
440 max = d;
441 }
442 }
443 });
444 max
445}
446
447fn chain_len(node: Node) -> usize {
448 let mut depth = 0;
449 let mut current = node;
450 while current.kind() == "field_expression" || current.kind() == "call_expression" {
451 if current.kind() == "field_expression" {
452 depth += 1;
453 }
454 match current.child(0) {
455 Some(c) => current = c,
456 None => break,
457 }
458 }
459 depth
460}
461
462fn count_case_labels(node: Node) -> usize {
463 let mut count = 0;
464 let mut cursor = node.walk();
465 visit_all(node, &mut cursor, &mut |n| {
466 if n.kind() == "case_statement" {
467 count += 1;
468 }
469 });
470 count
471}
472
473fn cognitive_complexity_c(node: tree_sitter::Node) -> usize {
474 let mut score = 0;
475 cc_walk_c(node, 0, &mut score);
476 score
477}
478
479fn cc_walk_c(node: tree_sitter::Node, nesting: usize, score: &mut usize) {
480 match node.kind() {
481 "if_statement" => {
482 *score += 1 + nesting;
483 cc_children_c(node, nesting + 1, score);
484 return;
485 }
486 "for_statement" | "while_statement" | "do_statement" => {
487 *score += 1 + nesting;
488 cc_children_c(node, nesting + 1, score);
489 return;
490 }
491 "switch_statement" => {
492 *score += 1 + nesting;
493 cc_children_c(node, nesting + 1, score);
494 return;
495 }
496 "else_clause" => {
497 *score += 1;
498 }
499 "binary_expression" => {
500 if let Some(op) = node.child_by_field_name("operator")
501 && (op.kind() == "&&" || op.kind() == "||")
502 {
503 *score += 1;
504 }
505 }
506 "catch_clause" => {
507 *score += 1 + nesting;
508 cc_children_c(node, nesting + 1, score);
509 return;
510 }
511 _ => {}
512 }
513 cc_children_c(node, nesting, score);
514}
515
516fn cc_children_c(node: tree_sitter::Node, nesting: usize, score: &mut usize) {
517 let mut cursor = node.walk();
518 for child in node.children(&mut cursor) {
519 cc_walk_c(child, nesting, score);
520 }
521}
522
523fn collect_external_refs_c(body: Node, src: &[u8]) -> Vec<String> {
524 let mut refs = Vec::new();
525 let mut cursor = body.walk();
526 visit_all(body, &mut cursor, &mut |n| {
527 if n.kind() == "field_expression"
528 && let Some(obj) = n.child(0)
529 && obj.kind() == "identifier"
530 {
531 let name = node_text(obj, src).to_string();
532 if !refs.contains(&name) {
533 refs.push(name);
534 }
535 }
536 });
537 refs
538}
539
540fn check_delegating_c(body: Node, src: &[u8]) -> bool {
541 let mut cursor = body.walk();
542 let stmts: Vec<Node> = body
543 .children(&mut cursor)
544 .filter(|n| n.kind() != "{" && n.kind() != "}" && !n.kind().contains("comment"))
545 .collect();
546 if stmts.len() != 1 {
547 return false;
548 }
549 let stmt = stmts[0];
550 let call = match stmt.kind() {
551 "return_statement" => stmt.child(1).filter(|c| c.kind() == "call_expression"),
552 "expression_statement" => stmt.child(0).filter(|c| c.kind() == "call_expression"),
553 _ => None,
554 };
555 call.and_then(|c| c.child(0))
556 .is_some_and(|f| node_text(f, src).contains('.') || node_text(f, src).contains("->"))
557}
558
559fn collect_field_refs_c(body: Node, src: &[u8]) -> Vec<String> {
560 let mut refs = Vec::new();
561 let mut cursor = body.walk();
562 visit_all(body, &mut cursor, &mut |n| {
563 if n.kind() == "field_expression"
564 && let Some(field) = n.child_by_field_name("field")
565 {
566 let name = node_text(field, src).to_string();
567 if !refs.contains(&name) {
568 refs.push(name);
569 }
570 }
571 });
572 refs
573}
574
575fn collect_null_checks_c(body: Node, src: &[u8]) -> Vec<String> {
576 let mut fields = Vec::new();
577 let mut cursor = body.walk();
578 visit_all(body, &mut cursor, &mut |n| {
579 if n.kind() == "binary_expression" {
580 let text = node_text(n, src);
581 if (text.contains("NULL") || text.contains("nullptr"))
582 && let Some(left) = n.child(0)
583 {
584 let name = node_text(left, src).to_string();
585 if !fields.contains(&name) {
586 fields.push(name);
587 }
588 }
589 }
590 });
591 fields
592}
593
594fn extract_switch_target_c(body: Node, src: &[u8]) -> Option<String> {
595 let mut cursor = body.walk();
596 let mut target = None;
597 visit_all(body, &mut cursor, &mut |n| {
598 if n.kind() == "switch_statement"
599 && target.is_none()
600 && let Some(cond) = n.child_by_field_name("condition")
601 {
602 target = Some(node_text(cond, src).trim_matches(['(', ')']).to_string());
603 }
604 });
605 target
606}
607
608fn collect_calls_c(body: Node, src: &[u8]) -> Vec<String> {
609 let mut calls = Vec::new();
610 let mut cursor = body.walk();
611 visit_all(body, &mut cursor, &mut |n| {
612 if n.kind() == "call_expression"
613 && let Some(func) = n.child(0)
614 {
615 let name = node_text(func, src).to_string();
616 if !calls.contains(&name) {
617 calls.push(name);
618 }
619 }
620 });
621 calls
622}
623
624fn count_comment_lines(node: Node, src: &[u8]) -> usize {
625 let mut count = 0;
626 let mut cursor = node.walk();
627 visit_all(node, &mut cursor, &mut |n| {
628 if n.kind() == "comment" {
629 count += node_text(n, src).lines().count();
630 }
631 });
632 count
633}
634
635fn hash_ast(node: Node) -> u64 {
636 let mut hasher = DefaultHasher::new();
637 hash_node(node, &mut hasher);
638 hasher.finish()
639}
640
641fn hash_node(node: Node, hasher: &mut DefaultHasher) {
642 node.kind().hash(hasher);
643 let mut cursor = node.walk();
644 for child in node.children(&mut cursor) {
645 hash_node(child, hasher);
646 }
647}
648
649fn node_text<'a>(node: Node, src: &'a [u8]) -> &'a str {
650 node.utf8_text(src).unwrap_or("")
651}
652
653fn collect_comments(root: Node, src: &[u8]) -> Vec<cha_core::CommentInfo> {
654 let mut comments = Vec::new();
655 let mut cursor = root.walk();
656 visit_all(root, &mut cursor, &mut |n| {
657 if n.kind().contains("comment") {
658 comments.push(cha_core::CommentInfo {
659 text: node_text(n, src).to_string(),
660 line: n.start_position().row + 1,
661 });
662 }
663 });
664 comments
665}
666
667fn visit_all<F: FnMut(Node)>(node: Node, cursor: &mut tree_sitter::TreeCursor, f: &mut F) {
668 f(node);
669 if cursor.goto_first_child() {
670 loop {
671 let child_node = cursor.node();
672 let mut child_cursor = child_node.walk();
673 visit_all(child_node, &mut child_cursor, f);
674 if !cursor.goto_next_sibling() {
675 break;
676 }
677 }
678 cursor.goto_parent();
679 }
680}