Skip to main content

cha_parser/
c_lang.rs

1use std::collections::hash_map::DefaultHasher;
2use std::hash::{Hash, Hasher};
3
4use cha_core::{ClassInfo, FunctionInfo, ImportInfo, SourceFile, SourceModel};
5use tree_sitter::{Node, Parser};
6
7use crate::LanguageParser;
8
9pub struct CParser;
10pub struct CppParser;
11
12impl LanguageParser for CParser {
13    fn language_name(&self) -> &str {
14        "c"
15    }
16    fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
17        parse_c_like(file, "c", &tree_sitter_c::LANGUAGE.into())
18    }
19}
20
21impl LanguageParser for CppParser {
22    fn language_name(&self) -> &str {
23        "cpp"
24    }
25    fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
26        parse_c_like(file, "cpp", &tree_sitter_cpp::LANGUAGE.into())
27    }
28}
29
30fn parse_c_like(
31    file: &SourceFile,
32    lang: &str,
33    language: &tree_sitter::Language,
34) -> Option<SourceModel> {
35    let mut parser = Parser::new();
36    parser.set_language(language).ok()?;
37    let tree = parser.parse(&file.content, None)?;
38    let root = tree.root_node();
39    let src = file.content.as_bytes();
40
41    let mut functions = Vec::new();
42    let mut classes = Vec::new();
43    let mut imports = Vec::new();
44    let mut type_aliases = Vec::new();
45
46    let imports_map = crate::c_imports::build(root, src);
47    collect_top_level(
48        root,
49        src,
50        &imports_map,
51        &mut functions,
52        &mut classes,
53        &mut imports,
54        &mut type_aliases,
55    );
56
57    // C OOP heuristic: if a function's first parameter is a pointer to a known
58    // struct type, count it as a method of that struct. This is the universal
59    // C OOP convention used by GLib/GObject, Linux kernel, lvgl, FreeRTOS, etc.
60    associate_methods(&functions, &mut classes);
61
62    if is_header_file(file) {
63        for f in &mut functions {
64            f.is_exported = true;
65        }
66    }
67
68    Some(SourceModel {
69        language: lang.into(),
70        total_lines: file.line_count(),
71        functions,
72        classes,
73        imports,
74        comments: collect_comments(root, src),
75        type_aliases,
76    })
77}
78
79fn is_header_file(file: &SourceFile) -> bool {
80    file.path
81        .extension()
82        .is_some_and(|e| e == "h" || e == "hxx" || e == "hpp")
83}
84
85/// C OOP heuristic: associate free functions with structs (same-file).
86/// A function is a "method" if its first parameter is a pointer to that struct.
87fn associate_methods(functions: &[FunctionInfo], classes: &mut [ClassInfo]) {
88    for class in classes.iter_mut() {
89        let count = functions
90            .iter()
91            .filter(|f| {
92                f.parameter_types.first().is_some_and(|t| {
93                    t.raw.contains('*')
94                        && t.raw.split('*').next().unwrap_or("").trim() == class.name
95                })
96            })
97            .count();
98        if count > 0 {
99            class.method_count += count;
100            class.has_behavior = true;
101        }
102    }
103}
104
105// cha:ignore cognitive_complexity
106fn collect_top_level(
107    root: Node,
108    src: &[u8],
109    imports_map: &crate::type_ref::ImportsMap,
110    functions: &mut Vec<FunctionInfo>,
111    classes: &mut Vec<ClassInfo>,
112    imports: &mut Vec<ImportInfo>,
113    type_aliases: &mut Vec<(String, String)>,
114) {
115    let mut cursor = root.walk();
116    for child in root.children(&mut cursor) {
117        match child.kind() {
118            "function_definition" => {
119                // Heuristic: `class MACRO Name { ... };` is parsed by tree-sitter
120                // as a function_definition whose return type is a class_specifier.
121                // Detect this and extract as a class instead.
122                if let Some(c) = try_extract_macro_class(child, src) {
123                    classes.push(c);
124                } else if let Some(f) = extract_function(child, src, imports_map) {
125                    functions.push(f);
126                }
127            }
128            "struct_specifier" | "class_specifier" => {
129                if let Some(c) = extract_class(child, src) {
130                    classes.push(c);
131                }
132            }
133            "type_definition" => {
134                extract_typedef_struct(child, src, classes, type_aliases);
135            }
136            "preproc_include" => {
137                if let Some(imp) = extract_include(child, src) {
138                    imports.push(imp);
139                }
140            }
141            _ => {
142                if child.child_count() > 0 {
143                    collect_top_level(
144                        child,
145                        src,
146                        imports_map,
147                        functions,
148                        classes,
149                        imports,
150                        type_aliases,
151                    );
152                }
153            }
154        }
155    }
156}
157
158fn extract_typedef_struct(
159    node: Node,
160    src: &[u8],
161    classes: &mut Vec<ClassInfo>,
162    type_aliases: &mut Vec<(String, String)>,
163) {
164    let found_struct = register_typedef_struct_children(node, src, classes, type_aliases);
165    if !found_struct {
166        register_simple_typedef(node, src, type_aliases);
167    }
168}
169
170fn register_typedef_struct_children(
171    node: Node,
172    src: &[u8],
173    classes: &mut Vec<ClassInfo>,
174    type_aliases: &mut Vec<(String, String)>,
175) -> bool {
176    let mut found_struct = false;
177    let mut inner = node.walk();
178    for sub in node.children(&mut inner) {
179        if sub.kind() != "struct_specifier" && sub.kind() != "class_specifier" {
180            continue;
181        }
182        found_struct = true;
183        register_single_typedef_struct(node, sub, src, classes, type_aliases);
184    }
185    found_struct
186}
187
188fn register_single_typedef_struct(
189    typedef: Node,
190    sub: Node,
191    src: &[u8],
192    classes: &mut Vec<ClassInfo>,
193    type_aliases: &mut Vec<(String, String)>,
194) {
195    let Some(mut c) = extract_class(sub, src) else {
196        return;
197    };
198    let original_name = c.name.clone();
199    if c.name.is_empty()
200        && let Some(decl) = typedef.child_by_field_name("declarator")
201    {
202        c.name = node_text(decl, src).to_string();
203    }
204    if !original_name.is_empty()
205        && let Some(decl) = typedef.child_by_field_name("declarator")
206    {
207        let alias = node_text(decl, src).to_string();
208        if alias != original_name {
209            type_aliases.push((alias, original_name));
210        }
211    }
212    if !c.name.is_empty() {
213        classes.push(c);
214    }
215}
216
217/// `typedef uint32_t lv_part_t;` — simple alias, no struct body.
218fn register_simple_typedef(node: Node, src: &[u8], type_aliases: &mut Vec<(String, String)>) {
219    let alias = extract_typedef_alias(node, src);
220    let original = node
221        .child_by_field_name("type")
222        .map(|t| node_text(t, src).trim().to_string())
223        .unwrap_or_default();
224    if !alias.is_empty() && alias != original {
225        type_aliases.push((alias, original));
226    }
227}
228
229/// Find the new type name in a `typedef <something> <name>;`. Tree-sitter-c
230/// sometimes puts the name behind the `declarator` field; other grammars
231/// (typedef of enum/union without body) emit a plain `type_identifier` as
232/// a top-level child. Try the field first, then the first type_identifier.
233fn extract_typedef_alias(node: Node, src: &[u8]) -> String {
234    if let Some(decl) = node.child_by_field_name("declarator") {
235        return node_text(decl, src).trim().to_string();
236    }
237    let mut cursor = node.walk();
238    for child in node.children(&mut cursor) {
239        if child.kind() == "type_identifier" {
240            return node_text(child, src).trim().to_string();
241        }
242    }
243    String::new()
244}
245
246/// Detect `class MACRO ClassName { ... };` misparse.
247/// tree-sitter sees this as function_definition with class_specifier return type.
248fn try_extract_macro_class(node: Node, src: &[u8]) -> Option<ClassInfo> {
249    let mut has_class_spec = false;
250    let mut cursor = node.walk();
251    for child in node.children(&mut cursor) {
252        if child.kind() == "class_specifier" || child.kind() == "struct_specifier" {
253            has_class_spec = true;
254        }
255    }
256    if !has_class_spec {
257        return None;
258    }
259    // The real class name is the "identifier" child (what tree-sitter thinks is the func name)
260    let name_node = node
261        .child_by_field_name("declarator")
262        .filter(|d| d.kind() == "identifier")?;
263    let name = node_text(name_node, src).to_string();
264    let name_col = name_node.start_position().column;
265    let name_end_col = name_node.end_position().column;
266    let body = node.child_by_field_name("body")?;
267    let start_line = node.start_position().row + 1;
268    let end_line = node.end_position().row + 1;
269    let method_count = count_methods(body);
270    let (field_names, field_types, first_field_type) = extract_field_info(body, src);
271
272    // Find parent from the class_specifier's base_class_clause if present
273    let parent_name = first_field_type;
274
275    Some(ClassInfo {
276        name,
277        start_line,
278        end_line,
279        name_col,
280        name_end_col,
281        line_count: end_line - start_line + 1,
282        method_count,
283        is_exported: true,
284        delegating_method_count: 0,
285        field_count: field_names.len(),
286        field_names,
287        field_types,
288        has_behavior: method_count > 0,
289        is_interface: false,
290        parent_name,
291        override_count: 0,
292        self_call_count: 0,
293        has_listener_field: false,
294        has_notify_method: false,
295    })
296}
297
298fn extract_function(
299    node: Node,
300    src: &[u8],
301    imports_map: &crate::type_ref::ImportsMap,
302) -> Option<FunctionInfo> {
303    let declarator = node.child_by_field_name("declarator")?;
304    let name_node = find_func_name_node(declarator)?;
305    let name = node_text(name_node, src).to_string();
306    let name_col = name_node.start_position().column;
307    let name_end_col = name_node.end_position().column;
308    let start_line = node.start_position().row + 1;
309    let end_line = node.end_position().row + 1;
310    let body = node.child_by_field_name("body");
311    let (param_count, param_types) = extract_params(declarator, src, imports_map);
312    let is_static = has_storage_class(node, src, "static");
313
314    Some(FunctionInfo {
315        name,
316        start_line,
317        end_line,
318        name_col,
319        name_end_col,
320        line_count: end_line - start_line + 1,
321        complexity: count_complexity(node),
322        body_hash: body.map(hash_ast),
323        is_exported: !is_static,
324        parameter_count: param_count,
325        parameter_types: param_types,
326        chain_depth: body.map(max_chain_depth).unwrap_or(0),
327        switch_arms: body.map(count_case_labels).unwrap_or(0),
328        external_refs: body
329            .map(|b| collect_external_refs_c(b, src))
330            .unwrap_or_default(),
331        is_delegating: body.map(|b| check_delegating_c(b, src)).unwrap_or(false),
332        comment_lines: count_comment_lines(node, src),
333        referenced_fields: body
334            .map(|b| collect_field_refs_c(b, src))
335            .unwrap_or_default(),
336        null_check_fields: body
337            .map(|b| collect_null_checks_c(b, src))
338            .unwrap_or_default(),
339        switch_dispatch_target: body.and_then(|b| extract_switch_target_c(b, src)),
340        optional_param_count: 0,
341        called_functions: body.map(|b| collect_calls_c(b, src)).unwrap_or_default(),
342        cognitive_complexity: body.map(cognitive_complexity_c).unwrap_or(0),
343    })
344}
345
346/// Check if a declaration node has a specific storage class specifier (e.g. "static").
347fn has_storage_class(node: Node, src: &[u8], keyword: &str) -> bool {
348    for i in 0..node.child_count() {
349        if let Some(child) = node.child(i)
350            && child.kind() == "storage_class_specifier"
351            && node_text(child, src) == keyword
352        {
353            return true;
354        }
355    }
356    false
357}
358
359fn find_func_name_node(declarator: Node) -> Option<Node> {
360    if declarator.kind() == "identifier" {
361        return Some(declarator);
362    }
363    declarator
364        .child_by_field_name("declarator")
365        .and_then(find_func_name_node)
366}
367
368fn extract_params(
369    declarator: Node,
370    src: &[u8],
371    imports_map: &crate::type_ref::ImportsMap,
372) -> (usize, Vec<cha_core::TypeRef>) {
373    let params = match declarator.child_by_field_name("parameters") {
374        Some(p) => p,
375        None => return (0, vec![]),
376    };
377    let mut count = 0;
378    let mut types = Vec::new();
379    let mut cursor = params.walk();
380    for child in params.children(&mut cursor) {
381        if child.kind() == "parameter_declaration" {
382            count += 1;
383            let base = child
384                .child_by_field_name("type")
385                .map(|t| node_text(t, src).to_string())
386                .unwrap_or_else(|| "int".into());
387            let is_ptr = child
388                .child_by_field_name("declarator")
389                .is_some_and(|d| d.kind() == "pointer_declarator");
390            let raw = if is_ptr { format!("{base} *") } else { base };
391            types.push(crate::type_ref::resolve(raw, imports_map));
392        }
393    }
394    (count, types)
395}
396
397fn extract_class(node: Node, src: &[u8]) -> Option<ClassInfo> {
398    let name_node = node.child_by_field_name("name");
399    let name = name_node
400        .map(|n| node_text(n, src).to_string())
401        .unwrap_or_default();
402    let name_col = name_node.map(|n| n.start_position().column).unwrap_or(0);
403    let name_end_col = name_node.map(|n| n.end_position().column).unwrap_or(0);
404    let start_line = node.start_position().row + 1;
405    let end_line = node.end_position().row + 1;
406    let body = node.child_by_field_name("body");
407    let method_count = body.map(count_methods).unwrap_or(0);
408    let (field_names, field_types, first_field_type) =
409        body.map(|b| extract_field_info(b, src)).unwrap_or_default();
410
411    Some(ClassInfo {
412        name,
413        start_line,
414        end_line,
415        name_col,
416        name_end_col,
417        line_count: end_line - start_line + 1,
418        method_count,
419        is_exported: true,
420        delegating_method_count: 0,
421        field_count: field_names.len(),
422        field_names,
423        field_types,
424        has_behavior: method_count > 0,
425        is_interface: false,
426        // First field type stored as parent candidate;
427        // build_class_graph validates against known struct names.
428        parent_name: first_field_type,
429        override_count: 0,
430        self_call_count: 0,
431        has_listener_field: false,
432        has_notify_method: false,
433    })
434}
435
436fn extract_field_info(body: Node, src: &[u8]) -> (Vec<String>, Vec<String>, Option<String>) {
437    let mut names = Vec::new();
438    let mut types = Vec::new();
439    let mut first_type = None;
440    let mut cursor = body.walk();
441    for child in body.children(&mut cursor) {
442        if child.kind() == "field_declaration" {
443            if let Some(decl) = child.child_by_field_name("declarator") {
444                names.push(node_text(decl, src).to_string());
445            }
446            let ty = child
447                .child_by_field_name("type")
448                .map(|t| node_text(t, src).to_string());
449            if first_type.is_none() {
450                first_type = ty.clone();
451            }
452            types.push(ty.unwrap_or_default());
453        }
454    }
455    (names, types, first_type)
456}
457
458fn count_methods(body: Node) -> usize {
459    let mut count = 0;
460    let mut cursor = body.walk();
461    for child in body.children(&mut cursor) {
462        if child.kind() == "function_definition" || child.kind() == "declaration" {
463            count += 1;
464        }
465    }
466    count
467}
468
469fn extract_include(node: Node, src: &[u8]) -> Option<ImportInfo> {
470    let path = node.child_by_field_name("path")?;
471    let text = node_text(path, src)
472        .trim_matches(|c| c == '"' || c == '<' || c == '>')
473        .to_string();
474    Some(ImportInfo {
475        source: text,
476        line: node.start_position().row + 1,
477        col: node.start_position().column,
478        ..Default::default()
479    })
480}
481
482fn count_complexity(node: Node) -> usize {
483    let mut c = 1usize;
484    let mut cursor = node.walk();
485    visit_all(node, &mut cursor, &mut |n| match n.kind() {
486        "if_statement"
487        | "for_statement"
488        | "while_statement"
489        | "do_statement"
490        | "case_statement"
491        | "catch_clause"
492        | "conditional_expression" => c += 1,
493        "binary_expression" => {
494            if let Some(op) = n.child_by_field_name("operator") {
495                let kind = op.kind();
496                if kind == "&&" || kind == "||" {
497                    c += 1;
498                }
499            }
500        }
501        _ => {}
502    });
503    c
504}
505
506fn max_chain_depth(node: Node) -> usize {
507    let mut max = 0;
508    let mut cursor = node.walk();
509    visit_all(node, &mut cursor, &mut |n| {
510        if n.kind() == "field_expression" {
511            let d = chain_len(n);
512            if d > max {
513                max = d;
514            }
515        }
516    });
517    max
518}
519
520fn chain_len(node: Node) -> usize {
521    let mut depth = 0;
522    let mut current = node;
523    while current.kind() == "field_expression" || current.kind() == "call_expression" {
524        if current.kind() == "field_expression" {
525            depth += 1;
526        }
527        match current.child(0) {
528            Some(c) => current = c,
529            None => break,
530        }
531    }
532    depth
533}
534
535fn count_case_labels(node: Node) -> usize {
536    let mut count = 0;
537    let mut cursor = node.walk();
538    visit_all(node, &mut cursor, &mut |n| {
539        if n.kind() == "case_statement" {
540            count += 1;
541        }
542    });
543    count
544}
545
546fn cognitive_complexity_c(node: tree_sitter::Node) -> usize {
547    let mut score = 0;
548    cc_walk_c(node, 0, &mut score);
549    score
550}
551
552fn cc_walk_c(node: tree_sitter::Node, nesting: usize, score: &mut usize) {
553    match node.kind() {
554        "if_statement" => {
555            *score += 1 + nesting;
556            cc_children_c(node, nesting + 1, score);
557            return;
558        }
559        "for_statement" | "while_statement" | "do_statement" => {
560            *score += 1 + nesting;
561            cc_children_c(node, nesting + 1, score);
562            return;
563        }
564        "switch_statement" => {
565            *score += 1 + nesting;
566            cc_children_c(node, nesting + 1, score);
567            return;
568        }
569        "else_clause" => {
570            *score += 1;
571        }
572        "binary_expression" => {
573            if let Some(op) = node.child_by_field_name("operator")
574                && (op.kind() == "&&" || op.kind() == "||")
575            {
576                *score += 1;
577            }
578        }
579        "catch_clause" => {
580            *score += 1 + nesting;
581            cc_children_c(node, nesting + 1, score);
582            return;
583        }
584        _ => {}
585    }
586    cc_children_c(node, nesting, score);
587}
588
589fn cc_children_c(node: tree_sitter::Node, nesting: usize, score: &mut usize) {
590    let mut cursor = node.walk();
591    for child in node.children(&mut cursor) {
592        cc_walk_c(child, nesting, score);
593    }
594}
595
596fn collect_external_refs_c(body: Node, src: &[u8]) -> Vec<String> {
597    let mut refs = Vec::new();
598    let mut cursor = body.walk();
599    visit_all(body, &mut cursor, &mut |n| {
600        if n.kind() == "field_expression"
601            && let Some(obj) = n.child(0)
602            && obj.kind() == "identifier"
603        {
604            let name = node_text(obj, src).to_string();
605            if !refs.contains(&name) {
606                refs.push(name);
607            }
608        }
609    });
610    refs
611}
612
613fn check_delegating_c(body: Node, src: &[u8]) -> bool {
614    let mut cursor = body.walk();
615    let stmts: Vec<Node> = body
616        .children(&mut cursor)
617        .filter(|n| n.kind() != "{" && n.kind() != "}" && !n.kind().contains("comment"))
618        .collect();
619    if stmts.len() != 1 {
620        return false;
621    }
622    let stmt = stmts[0];
623    let call = match stmt.kind() {
624        "return_statement" => stmt.child(1).filter(|c| c.kind() == "call_expression"),
625        "expression_statement" => stmt.child(0).filter(|c| c.kind() == "call_expression"),
626        _ => None,
627    };
628    call.and_then(|c| c.child(0))
629        .is_some_and(|f| node_text(f, src).contains('.') || node_text(f, src).contains("->"))
630}
631
632fn collect_field_refs_c(body: Node, src: &[u8]) -> Vec<String> {
633    let mut refs = Vec::new();
634    let mut cursor = body.walk();
635    visit_all(body, &mut cursor, &mut |n| {
636        if n.kind() == "field_expression"
637            && let Some(field) = n.child_by_field_name("field")
638        {
639            let name = node_text(field, src).to_string();
640            if !refs.contains(&name) {
641                refs.push(name);
642            }
643        }
644    });
645    refs
646}
647
648fn collect_null_checks_c(body: Node, src: &[u8]) -> Vec<String> {
649    let mut fields = Vec::new();
650    let mut cursor = body.walk();
651    visit_all(body, &mut cursor, &mut |n| {
652        if n.kind() == "binary_expression" {
653            let text = node_text(n, src);
654            if (text.contains("NULL") || text.contains("nullptr"))
655                && let Some(left) = n.child(0)
656            {
657                let name = node_text(left, src).to_string();
658                if !fields.contains(&name) {
659                    fields.push(name);
660                }
661            }
662        }
663    });
664    fields
665}
666
667fn extract_switch_target_c(body: Node, src: &[u8]) -> Option<String> {
668    let mut cursor = body.walk();
669    let mut target = None;
670    visit_all(body, &mut cursor, &mut |n| {
671        if n.kind() == "switch_statement"
672            && target.is_none()
673            && let Some(cond) = n.child_by_field_name("condition")
674        {
675            target = Some(node_text(cond, src).trim_matches(['(', ')']).to_string());
676        }
677    });
678    target
679}
680
681fn collect_calls_c(body: Node, src: &[u8]) -> Vec<String> {
682    let mut calls = Vec::new();
683    let mut cursor = body.walk();
684    visit_all(body, &mut cursor, &mut |n| {
685        if n.kind() == "call_expression"
686            && let Some(func) = n.child(0)
687        {
688            let name = node_text(func, src).to_string();
689            if !calls.contains(&name) {
690                calls.push(name);
691            }
692        }
693    });
694    calls
695}
696
697fn count_comment_lines(node: Node, src: &[u8]) -> usize {
698    let mut count = 0;
699    let mut cursor = node.walk();
700    visit_all(node, &mut cursor, &mut |n| {
701        if n.kind() == "comment" {
702            count += node_text(n, src).lines().count();
703        }
704    });
705    count
706}
707
708fn hash_ast(node: Node) -> u64 {
709    let mut hasher = DefaultHasher::new();
710    hash_node(node, &mut hasher);
711    hasher.finish()
712}
713
714fn hash_node(node: Node, hasher: &mut DefaultHasher) {
715    node.kind().hash(hasher);
716    let mut cursor = node.walk();
717    for child in node.children(&mut cursor) {
718        hash_node(child, hasher);
719    }
720}
721
722fn node_text<'a>(node: Node, src: &'a [u8]) -> &'a str {
723    node.utf8_text(src).unwrap_or("")
724}
725
726fn collect_comments(root: Node, src: &[u8]) -> Vec<cha_core::CommentInfo> {
727    let mut comments = Vec::new();
728    let mut cursor = root.walk();
729    visit_all(root, &mut cursor, &mut |n| {
730        if n.kind().contains("comment") {
731            comments.push(cha_core::CommentInfo {
732                text: node_text(n, src).to_string(),
733                line: n.start_position().row + 1,
734            });
735        }
736    });
737    comments
738}
739
740fn visit_all<F: FnMut(Node)>(node: Node, cursor: &mut tree_sitter::TreeCursor, f: &mut F) {
741    f(node);
742    if cursor.goto_first_child() {
743        loop {
744            let child_node = cursor.node();
745            let mut child_cursor = child_node.walk();
746            visit_all(child_node, &mut child_cursor, f);
747            if !cursor.goto_next_sibling() {
748                break;
749            }
750        }
751        cursor.goto_parent();
752    }
753}