Skip to main content

cha_parser/
c_lang.rs

1use std::collections::hash_map::DefaultHasher;
2use std::hash::{Hash, Hasher};
3
4use cha_core::{ClassInfo, FunctionInfo, ImportInfo, SourceFile, SourceModel};
5use tree_sitter::{Node, Parser};
6
7use crate::LanguageParser;
8
9pub struct CParser;
10pub struct CppParser;
11
12impl LanguageParser for CParser {
13    fn language_name(&self) -> &str {
14        "c"
15    }
16    fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
17        parse_c_like(file, "c", &tree_sitter_c::LANGUAGE.into())
18    }
19}
20
21impl LanguageParser for CppParser {
22    fn language_name(&self) -> &str {
23        "cpp"
24    }
25    fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
26        parse_c_like(file, "cpp", &tree_sitter_cpp::LANGUAGE.into())
27    }
28}
29
30fn parse_c_like(
31    file: &SourceFile,
32    lang: &str,
33    language: &tree_sitter::Language,
34) -> Option<SourceModel> {
35    let mut parser = Parser::new();
36    parser.set_language(language).ok()?;
37    let tree = parser.parse(&file.content, None)?;
38    let root = tree.root_node();
39    let src = file.content.as_bytes();
40
41    let mut functions = Vec::new();
42    let mut classes = Vec::new();
43    let mut imports = Vec::new();
44    let mut type_aliases = Vec::new();
45
46    collect_top_level(
47        root,
48        src,
49        &mut functions,
50        &mut classes,
51        &mut imports,
52        &mut type_aliases,
53    );
54
55    // C OOP heuristic: if a function's first parameter is a pointer to a known
56    // struct type, count it as a method of that struct. This is the universal
57    // C OOP convention used by GLib/GObject, Linux kernel, lvgl, FreeRTOS, etc.
58    associate_methods(&functions, &mut classes);
59
60    if is_header_file(file) {
61        for f in &mut functions {
62            f.is_exported = true;
63        }
64    }
65
66    Some(SourceModel {
67        language: lang.into(),
68        total_lines: file.line_count(),
69        functions,
70        classes,
71        imports,
72        comments: collect_comments(root, src),
73        type_aliases,
74    })
75}
76
77fn is_header_file(file: &SourceFile) -> bool {
78    file.path
79        .extension()
80        .is_some_and(|e| e == "h" || e == "hxx" || e == "hpp")
81}
82
83/// C OOP heuristic: associate free functions with structs (same-file).
84/// A function is a "method" if its first parameter is a pointer to that struct.
85fn associate_methods(functions: &[FunctionInfo], classes: &mut [ClassInfo]) {
86    for class in classes.iter_mut() {
87        let count = functions
88            .iter()
89            .filter(|f| {
90                f.parameter_types.first().is_some_and(|t| {
91                    t.contains('*') && t.split('*').next().unwrap_or("").trim() == class.name
92                })
93            })
94            .count();
95        if count > 0 {
96            class.method_count += count;
97            class.has_behavior = true;
98        }
99    }
100}
101
102// cha:ignore cognitive_complexity
103fn collect_top_level(
104    root: Node,
105    src: &[u8],
106    functions: &mut Vec<FunctionInfo>,
107    classes: &mut Vec<ClassInfo>,
108    imports: &mut Vec<ImportInfo>,
109    type_aliases: &mut Vec<(String, String)>,
110) {
111    let mut cursor = root.walk();
112    for child in root.children(&mut cursor) {
113        match child.kind() {
114            "function_definition" => {
115                // Heuristic: `class MACRO Name { ... };` is parsed by tree-sitter
116                // as a function_definition whose return type is a class_specifier.
117                // Detect this and extract as a class instead.
118                if let Some(c) = try_extract_macro_class(child, src) {
119                    classes.push(c);
120                } else if let Some(f) = extract_function(child, src) {
121                    functions.push(f);
122                }
123            }
124            "struct_specifier" | "class_specifier" => {
125                if let Some(c) = extract_class(child, src) {
126                    classes.push(c);
127                }
128            }
129            "type_definition" => {
130                extract_typedef_struct(child, src, classes, type_aliases);
131            }
132            "preproc_include" => {
133                if let Some(imp) = extract_include(child, src) {
134                    imports.push(imp);
135                }
136            }
137            _ => {
138                if child.child_count() > 0 {
139                    collect_top_level(child, src, functions, classes, imports, type_aliases);
140                }
141            }
142        }
143    }
144}
145
146fn extract_typedef_struct(
147    node: Node,
148    src: &[u8],
149    classes: &mut Vec<ClassInfo>,
150    type_aliases: &mut Vec<(String, String)>,
151) {
152    let mut inner = node.walk();
153    for sub in node.children(&mut inner) {
154        if sub.kind() != "struct_specifier" && sub.kind() != "class_specifier" {
155            continue;
156        }
157        let Some(mut c) = extract_class(sub, src) else {
158            continue;
159        };
160        let original_name = c.name.clone();
161        if c.name.is_empty()
162            && let Some(decl) = node.child_by_field_name("declarator")
163        {
164            c.name = node_text(decl, src).to_string();
165        }
166        // Record typedef alias: e.g. typedef struct _X { ... } X;
167        // original_name = "_X", c.name might be updated to "X" if it was empty
168        if !original_name.is_empty()
169            && let Some(decl) = node.child_by_field_name("declarator")
170        {
171            let alias = node_text(decl, src).to_string();
172            if alias != original_name {
173                type_aliases.push((alias, original_name));
174            }
175        }
176        if !c.name.is_empty() {
177            classes.push(c);
178        }
179    }
180}
181
182/// Detect `class MACRO ClassName { ... };` misparse.
183/// tree-sitter sees this as function_definition with class_specifier return type.
184fn try_extract_macro_class(node: Node, src: &[u8]) -> Option<ClassInfo> {
185    let mut has_class_spec = false;
186    let mut cursor = node.walk();
187    for child in node.children(&mut cursor) {
188        if child.kind() == "class_specifier" || child.kind() == "struct_specifier" {
189            has_class_spec = true;
190        }
191    }
192    if !has_class_spec {
193        return None;
194    }
195    // The real class name is the "identifier" child (what tree-sitter thinks is the func name)
196    let name = node
197        .child_by_field_name("declarator")
198        .filter(|d| d.kind() == "identifier")
199        .map(|d| node_text(d, src).to_string())?;
200    let body = node.child_by_field_name("body")?;
201    let start_line = node.start_position().row + 1;
202    let end_line = node.end_position().row + 1;
203    let method_count = count_methods(body);
204    let (field_names, field_types, first_field_type) = extract_field_info(body, src);
205
206    // Find parent from the class_specifier's base_class_clause if present
207    let parent_name = first_field_type;
208
209    Some(ClassInfo {
210        name,
211        start_line,
212        end_line,
213        line_count: end_line - start_line + 1,
214        method_count,
215        is_exported: true,
216        delegating_method_count: 0,
217        field_count: field_names.len(),
218        field_names,
219        field_types,
220        has_behavior: method_count > 0,
221        is_interface: false,
222        parent_name,
223        override_count: 0,
224        self_call_count: 0,
225        has_listener_field: false,
226        has_notify_method: false,
227    })
228}
229
230fn extract_function(node: Node, src: &[u8]) -> Option<FunctionInfo> {
231    let declarator = node.child_by_field_name("declarator")?;
232    let name = find_func_name(declarator, src)?.to_string();
233    let start_line = node.start_position().row + 1;
234    let end_line = node.end_position().row + 1;
235    let body = node.child_by_field_name("body");
236    let (param_count, param_types) = extract_params(declarator, src);
237    let is_static = has_storage_class(node, src, "static");
238
239    Some(FunctionInfo {
240        name,
241        start_line,
242        end_line,
243        line_count: end_line - start_line + 1,
244        complexity: count_complexity(node),
245        body_hash: body.map(hash_ast),
246        is_exported: !is_static,
247        parameter_count: param_count,
248        parameter_types: param_types,
249        chain_depth: body.map(max_chain_depth).unwrap_or(0),
250        switch_arms: body.map(count_case_labels).unwrap_or(0),
251        external_refs: body
252            .map(|b| collect_external_refs_c(b, src))
253            .unwrap_or_default(),
254        is_delegating: body.map(|b| check_delegating_c(b, src)).unwrap_or(false),
255        comment_lines: count_comment_lines(node, src),
256        referenced_fields: body
257            .map(|b| collect_field_refs_c(b, src))
258            .unwrap_or_default(),
259        null_check_fields: body
260            .map(|b| collect_null_checks_c(b, src))
261            .unwrap_or_default(),
262        switch_dispatch_target: body.and_then(|b| extract_switch_target_c(b, src)),
263        optional_param_count: 0,
264        called_functions: body.map(|b| collect_calls_c(b, src)).unwrap_or_default(),
265        cognitive_complexity: body.map(cognitive_complexity_c).unwrap_or(0),
266    })
267}
268
269/// Check if a declaration node has a specific storage class specifier (e.g. "static").
270fn has_storage_class(node: Node, src: &[u8], keyword: &str) -> bool {
271    for i in 0..node.child_count() {
272        if let Some(child) = node.child(i)
273            && child.kind() == "storage_class_specifier"
274            && node_text(child, src) == keyword
275        {
276            return true;
277        }
278    }
279    false
280}
281
282fn find_func_name<'a>(declarator: Node<'a>, src: &'a [u8]) -> Option<&'a str> {
283    // function_declarator -> declarator (identifier or qualified_identifier)
284    if declarator.kind() == "identifier" {
285        return Some(node_text(declarator, src));
286    }
287    declarator
288        .child_by_field_name("declarator")
289        .and_then(|d| find_func_name(d, src))
290}
291
292fn extract_params(declarator: Node, src: &[u8]) -> (usize, Vec<String>) {
293    let params = match declarator.child_by_field_name("parameters") {
294        Some(p) => p,
295        None => return (0, vec![]),
296    };
297    let mut count = 0;
298    let mut types = Vec::new();
299    let mut cursor = params.walk();
300    for child in params.children(&mut cursor) {
301        if child.kind() == "parameter_declaration" {
302            count += 1;
303            let base = child
304                .child_by_field_name("type")
305                .map(|t| node_text(t, src).to_string())
306                .unwrap_or_else(|| "int".into());
307            let is_ptr = child
308                .child_by_field_name("declarator")
309                .is_some_and(|d| d.kind() == "pointer_declarator");
310            types.push(if is_ptr { format!("{base} *") } else { base });
311        }
312    }
313    (count, types)
314}
315
316fn extract_class(node: Node, src: &[u8]) -> Option<ClassInfo> {
317    let name = node
318        .child_by_field_name("name")
319        .map(|n| node_text(n, src).to_string())
320        .unwrap_or_default();
321    let start_line = node.start_position().row + 1;
322    let end_line = node.end_position().row + 1;
323    let body = node.child_by_field_name("body");
324    let method_count = body.map(count_methods).unwrap_or(0);
325    let (field_names, field_types, first_field_type) =
326        body.map(|b| extract_field_info(b, src)).unwrap_or_default();
327
328    Some(ClassInfo {
329        name,
330        start_line,
331        end_line,
332        line_count: end_line - start_line + 1,
333        method_count,
334        is_exported: true,
335        delegating_method_count: 0,
336        field_count: field_names.len(),
337        field_names,
338        field_types,
339        has_behavior: method_count > 0,
340        is_interface: false,
341        // First field type stored as parent candidate;
342        // build_class_graph validates against known struct names.
343        parent_name: first_field_type,
344        override_count: 0,
345        self_call_count: 0,
346        has_listener_field: false,
347        has_notify_method: false,
348    })
349}
350
351fn extract_field_info(body: Node, src: &[u8]) -> (Vec<String>, Vec<String>, Option<String>) {
352    let mut names = Vec::new();
353    let mut types = Vec::new();
354    let mut first_type = None;
355    let mut cursor = body.walk();
356    for child in body.children(&mut cursor) {
357        if child.kind() == "field_declaration" {
358            if let Some(decl) = child.child_by_field_name("declarator") {
359                names.push(node_text(decl, src).to_string());
360            }
361            let ty = child
362                .child_by_field_name("type")
363                .map(|t| node_text(t, src).to_string());
364            if first_type.is_none() {
365                first_type = ty.clone();
366            }
367            types.push(ty.unwrap_or_default());
368        }
369    }
370    (names, types, first_type)
371}
372
373fn count_methods(body: Node) -> usize {
374    let mut count = 0;
375    let mut cursor = body.walk();
376    for child in body.children(&mut cursor) {
377        if child.kind() == "function_definition" || child.kind() == "declaration" {
378            count += 1;
379        }
380    }
381    count
382}
383
384fn extract_include(node: Node, src: &[u8]) -> Option<ImportInfo> {
385    let path = node.child_by_field_name("path")?;
386    let text = node_text(path, src)
387        .trim_matches(|c| c == '"' || c == '<' || c == '>')
388        .to_string();
389    Some(ImportInfo {
390        source: text,
391        line: node.start_position().row + 1,
392    })
393}
394
395fn count_complexity(node: Node) -> usize {
396    let mut c = 1usize;
397    let mut cursor = node.walk();
398    visit_all(node, &mut cursor, &mut |n| match n.kind() {
399        "if_statement"
400        | "for_statement"
401        | "while_statement"
402        | "do_statement"
403        | "case_statement"
404        | "catch_clause"
405        | "conditional_expression" => c += 1,
406        "binary_expression" => {
407            if let Some(op) = n.child_by_field_name("operator") {
408                let kind = op.kind();
409                if kind == "&&" || kind == "||" {
410                    c += 1;
411                }
412            }
413        }
414        _ => {}
415    });
416    c
417}
418
419fn max_chain_depth(node: Node) -> usize {
420    let mut max = 0;
421    let mut cursor = node.walk();
422    visit_all(node, &mut cursor, &mut |n| {
423        if n.kind() == "field_expression" {
424            let d = chain_len(n);
425            if d > max {
426                max = d;
427            }
428        }
429    });
430    max
431}
432
433fn chain_len(node: Node) -> usize {
434    let mut depth = 0;
435    let mut current = node;
436    while current.kind() == "field_expression" || current.kind() == "call_expression" {
437        if current.kind() == "field_expression" {
438            depth += 1;
439        }
440        match current.child(0) {
441            Some(c) => current = c,
442            None => break,
443        }
444    }
445    depth
446}
447
448fn count_case_labels(node: Node) -> usize {
449    let mut count = 0;
450    let mut cursor = node.walk();
451    visit_all(node, &mut cursor, &mut |n| {
452        if n.kind() == "case_statement" {
453            count += 1;
454        }
455    });
456    count
457}
458
459fn cognitive_complexity_c(node: tree_sitter::Node) -> usize {
460    let mut score = 0;
461    cc_walk_c(node, 0, &mut score);
462    score
463}
464
465fn cc_walk_c(node: tree_sitter::Node, nesting: usize, score: &mut usize) {
466    match node.kind() {
467        "if_statement" => {
468            *score += 1 + nesting;
469            cc_children_c(node, nesting + 1, score);
470            return;
471        }
472        "for_statement" | "while_statement" | "do_statement" => {
473            *score += 1 + nesting;
474            cc_children_c(node, nesting + 1, score);
475            return;
476        }
477        "switch_statement" => {
478            *score += 1 + nesting;
479            cc_children_c(node, nesting + 1, score);
480            return;
481        }
482        "else_clause" => {
483            *score += 1;
484        }
485        "binary_expression" => {
486            if let Some(op) = node.child_by_field_name("operator")
487                && (op.kind() == "&&" || op.kind() == "||")
488            {
489                *score += 1;
490            }
491        }
492        "catch_clause" => {
493            *score += 1 + nesting;
494            cc_children_c(node, nesting + 1, score);
495            return;
496        }
497        _ => {}
498    }
499    cc_children_c(node, nesting, score);
500}
501
502fn cc_children_c(node: tree_sitter::Node, nesting: usize, score: &mut usize) {
503    let mut cursor = node.walk();
504    for child in node.children(&mut cursor) {
505        cc_walk_c(child, nesting, score);
506    }
507}
508
509fn collect_external_refs_c(body: Node, src: &[u8]) -> Vec<String> {
510    let mut refs = Vec::new();
511    let mut cursor = body.walk();
512    visit_all(body, &mut cursor, &mut |n| {
513        if n.kind() == "field_expression"
514            && let Some(obj) = n.child(0)
515            && obj.kind() == "identifier"
516        {
517            let name = node_text(obj, src).to_string();
518            if !refs.contains(&name) {
519                refs.push(name);
520            }
521        }
522    });
523    refs
524}
525
526fn check_delegating_c(body: Node, src: &[u8]) -> bool {
527    let mut cursor = body.walk();
528    let stmts: Vec<Node> = body
529        .children(&mut cursor)
530        .filter(|n| n.kind() != "{" && n.kind() != "}" && !n.kind().contains("comment"))
531        .collect();
532    if stmts.len() != 1 {
533        return false;
534    }
535    let stmt = stmts[0];
536    let call = match stmt.kind() {
537        "return_statement" => stmt.child(1).filter(|c| c.kind() == "call_expression"),
538        "expression_statement" => stmt.child(0).filter(|c| c.kind() == "call_expression"),
539        _ => None,
540    };
541    call.and_then(|c| c.child(0))
542        .is_some_and(|f| node_text(f, src).contains('.') || node_text(f, src).contains("->"))
543}
544
545fn collect_field_refs_c(body: Node, src: &[u8]) -> Vec<String> {
546    let mut refs = Vec::new();
547    let mut cursor = body.walk();
548    visit_all(body, &mut cursor, &mut |n| {
549        if n.kind() == "field_expression"
550            && let Some(field) = n.child_by_field_name("field")
551        {
552            let name = node_text(field, src).to_string();
553            if !refs.contains(&name) {
554                refs.push(name);
555            }
556        }
557    });
558    refs
559}
560
561fn collect_null_checks_c(body: Node, src: &[u8]) -> Vec<String> {
562    let mut fields = Vec::new();
563    let mut cursor = body.walk();
564    visit_all(body, &mut cursor, &mut |n| {
565        if n.kind() == "binary_expression" {
566            let text = node_text(n, src);
567            if (text.contains("NULL") || text.contains("nullptr"))
568                && let Some(left) = n.child(0)
569            {
570                let name = node_text(left, src).to_string();
571                if !fields.contains(&name) {
572                    fields.push(name);
573                }
574            }
575        }
576    });
577    fields
578}
579
580fn extract_switch_target_c(body: Node, src: &[u8]) -> Option<String> {
581    let mut cursor = body.walk();
582    let mut target = None;
583    visit_all(body, &mut cursor, &mut |n| {
584        if n.kind() == "switch_statement"
585            && target.is_none()
586            && let Some(cond) = n.child_by_field_name("condition")
587        {
588            target = Some(node_text(cond, src).trim_matches(['(', ')']).to_string());
589        }
590    });
591    target
592}
593
594fn collect_calls_c(body: Node, src: &[u8]) -> Vec<String> {
595    let mut calls = Vec::new();
596    let mut cursor = body.walk();
597    visit_all(body, &mut cursor, &mut |n| {
598        if n.kind() == "call_expression"
599            && let Some(func) = n.child(0)
600        {
601            let name = node_text(func, src).to_string();
602            if !calls.contains(&name) {
603                calls.push(name);
604            }
605        }
606    });
607    calls
608}
609
610fn count_comment_lines(node: Node, src: &[u8]) -> usize {
611    let mut count = 0;
612    let mut cursor = node.walk();
613    visit_all(node, &mut cursor, &mut |n| {
614        if n.kind() == "comment" {
615            count += node_text(n, src).lines().count();
616        }
617    });
618    count
619}
620
621fn hash_ast(node: Node) -> u64 {
622    let mut hasher = DefaultHasher::new();
623    hash_node(node, &mut hasher);
624    hasher.finish()
625}
626
627fn hash_node(node: Node, hasher: &mut DefaultHasher) {
628    node.kind().hash(hasher);
629    let mut cursor = node.walk();
630    for child in node.children(&mut cursor) {
631        hash_node(child, hasher);
632    }
633}
634
635fn node_text<'a>(node: Node, src: &'a [u8]) -> &'a str {
636    node.utf8_text(src).unwrap_or("")
637}
638
639fn collect_comments(root: Node, src: &[u8]) -> Vec<cha_core::CommentInfo> {
640    let mut comments = Vec::new();
641    let mut cursor = root.walk();
642    visit_all(root, &mut cursor, &mut |n| {
643        if n.kind().contains("comment") {
644            comments.push(cha_core::CommentInfo {
645                text: node_text(n, src).to_string(),
646                line: n.start_position().row + 1,
647            });
648        }
649    });
650    comments
651}
652
653fn visit_all<F: FnMut(Node)>(node: Node, cursor: &mut tree_sitter::TreeCursor, f: &mut F) {
654    f(node);
655    if cursor.goto_first_child() {
656        loop {
657            let child_node = cursor.node();
658            let mut child_cursor = child_node.walk();
659            visit_all(child_node, &mut child_cursor, f);
660            if !cursor.goto_next_sibling() {
661                break;
662            }
663        }
664        cursor.goto_parent();
665    }
666}