Skip to main content

cha_parser/
c_lang.rs

1use std::collections::hash_map::DefaultHasher;
2use std::hash::{Hash, Hasher};
3
4use cha_core::{ClassInfo, FunctionInfo, ImportInfo, SourceFile, SourceModel};
5use tree_sitter::{Node, Parser};
6
7use crate::LanguageParser;
8
9pub struct CParser;
10pub struct CppParser;
11
12impl LanguageParser for CParser {
13    fn language_name(&self) -> &str {
14        "c"
15    }
16    fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
17        parse_c_like(file, "c", &tree_sitter_c::LANGUAGE.into())
18    }
19}
20
21impl LanguageParser for CppParser {
22    fn language_name(&self) -> &str {
23        "cpp"
24    }
25    fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
26        parse_c_like(file, "cpp", &tree_sitter_cpp::LANGUAGE.into())
27    }
28}
29
30fn parse_c_like(
31    file: &SourceFile,
32    lang: &str,
33    language: &tree_sitter::Language,
34) -> Option<SourceModel> {
35    let mut parser = Parser::new();
36    parser.set_language(language).ok()?;
37    let tree = parser.parse(&file.content, None)?;
38    let root = tree.root_node();
39    let src = file.content.as_bytes();
40
41    let mut functions = Vec::new();
42    let mut classes = Vec::new();
43    let mut imports = Vec::new();
44    let mut type_aliases = Vec::new();
45
46    collect_top_level(
47        root,
48        src,
49        &mut functions,
50        &mut classes,
51        &mut imports,
52        &mut type_aliases,
53    );
54
55    // C OOP heuristic: if a function's first parameter is a pointer to a known
56    // struct type, count it as a method of that struct. This is the universal
57    // C OOP convention used by GLib/GObject, Linux kernel, lvgl, FreeRTOS, etc.
58    associate_methods(&functions, &mut classes);
59
60    if is_header_file(file) {
61        for f in &mut functions {
62            f.is_exported = true;
63        }
64    }
65
66    Some(SourceModel {
67        language: lang.into(),
68        total_lines: file.line_count(),
69        functions,
70        classes,
71        imports,
72        comments: collect_comments(root, src),
73        type_aliases,
74    })
75}
76
77fn is_header_file(file: &SourceFile) -> bool {
78    file.path
79        .extension()
80        .is_some_and(|e| e == "h" || e == "hxx" || e == "hpp")
81}
82
83/// C OOP heuristic: associate free functions with structs (same-file).
84/// A function is a "method" if its first parameter is a pointer to that struct.
85fn associate_methods(functions: &[FunctionInfo], classes: &mut [ClassInfo]) {
86    for class in classes.iter_mut() {
87        let count = functions
88            .iter()
89            .filter(|f| {
90                f.parameter_types.first().is_some_and(|t| {
91                    t.contains('*') && t.split('*').next().unwrap_or("").trim() == class.name
92                })
93            })
94            .count();
95        if count > 0 {
96            class.method_count += count;
97            class.has_behavior = true;
98        }
99    }
100}
101
102// cha:ignore cognitive_complexity
103fn collect_top_level(
104    root: Node,
105    src: &[u8],
106    functions: &mut Vec<FunctionInfo>,
107    classes: &mut Vec<ClassInfo>,
108    imports: &mut Vec<ImportInfo>,
109    type_aliases: &mut Vec<(String, String)>,
110) {
111    let mut cursor = root.walk();
112    for child in root.children(&mut cursor) {
113        match child.kind() {
114            "function_definition" => {
115                // Heuristic: `class MACRO Name { ... };` is parsed by tree-sitter
116                // as a function_definition whose return type is a class_specifier.
117                // Detect this and extract as a class instead.
118                if let Some(c) = try_extract_macro_class(child, src) {
119                    classes.push(c);
120                } else if let Some(f) = extract_function(child, src) {
121                    functions.push(f);
122                }
123            }
124            "struct_specifier" | "class_specifier" => {
125                if let Some(c) = extract_class(child, src) {
126                    classes.push(c);
127                }
128            }
129            "type_definition" => {
130                extract_typedef_struct(child, src, classes, type_aliases);
131            }
132            "preproc_include" => {
133                if let Some(imp) = extract_include(child, src) {
134                    imports.push(imp);
135                }
136            }
137            _ => {
138                if child.child_count() > 0 {
139                    collect_top_level(child, src, functions, classes, imports, type_aliases);
140                }
141            }
142        }
143    }
144}
145
146fn extract_typedef_struct(
147    node: Node,
148    src: &[u8],
149    classes: &mut Vec<ClassInfo>,
150    type_aliases: &mut Vec<(String, String)>,
151) {
152    let mut inner = node.walk();
153    for sub in node.children(&mut inner) {
154        if sub.kind() != "struct_specifier" && sub.kind() != "class_specifier" {
155            continue;
156        }
157        let Some(mut c) = extract_class(sub, src) else {
158            continue;
159        };
160        let original_name = c.name.clone();
161        if c.name.is_empty()
162            && let Some(decl) = node.child_by_field_name("declarator")
163        {
164            c.name = node_text(decl, src).to_string();
165        }
166        // Record typedef alias: e.g. typedef struct _X { ... } X;
167        // original_name = "_X", c.name might be updated to "X" if it was empty
168        if !original_name.is_empty()
169            && let Some(decl) = node.child_by_field_name("declarator")
170        {
171            let alias = node_text(decl, src).to_string();
172            if alias != original_name {
173                type_aliases.push((alias, original_name));
174            }
175        }
176        if !c.name.is_empty() {
177            classes.push(c);
178        }
179    }
180}
181
182/// Detect `class MACRO ClassName { ... };` misparse.
183/// tree-sitter sees this as function_definition with class_specifier return type.
184fn try_extract_macro_class(node: Node, src: &[u8]) -> Option<ClassInfo> {
185    let mut has_class_spec = false;
186    let mut cursor = node.walk();
187    for child in node.children(&mut cursor) {
188        if child.kind() == "class_specifier" || child.kind() == "struct_specifier" {
189            has_class_spec = true;
190        }
191    }
192    if !has_class_spec {
193        return None;
194    }
195    // The real class name is the "identifier" child (what tree-sitter thinks is the func name)
196    let name_node = node
197        .child_by_field_name("declarator")
198        .filter(|d| d.kind() == "identifier")?;
199    let name = node_text(name_node, src).to_string();
200    let name_col = name_node.start_position().column;
201    let name_end_col = name_node.end_position().column;
202    let body = node.child_by_field_name("body")?;
203    let start_line = node.start_position().row + 1;
204    let end_line = node.end_position().row + 1;
205    let method_count = count_methods(body);
206    let (field_names, field_types, first_field_type) = extract_field_info(body, src);
207
208    // Find parent from the class_specifier's base_class_clause if present
209    let parent_name = first_field_type;
210
211    Some(ClassInfo {
212        name,
213        start_line,
214        end_line,
215        name_col,
216        name_end_col,
217        line_count: end_line - start_line + 1,
218        method_count,
219        is_exported: true,
220        delegating_method_count: 0,
221        field_count: field_names.len(),
222        field_names,
223        field_types,
224        has_behavior: method_count > 0,
225        is_interface: false,
226        parent_name,
227        override_count: 0,
228        self_call_count: 0,
229        has_listener_field: false,
230        has_notify_method: false,
231    })
232}
233
234fn extract_function(node: Node, src: &[u8]) -> Option<FunctionInfo> {
235    let declarator = node.child_by_field_name("declarator")?;
236    let name_node = find_func_name_node(declarator)?;
237    let name = node_text(name_node, src).to_string();
238    let name_col = name_node.start_position().column;
239    let name_end_col = name_node.end_position().column;
240    let start_line = node.start_position().row + 1;
241    let end_line = node.end_position().row + 1;
242    let body = node.child_by_field_name("body");
243    let (param_count, param_types) = extract_params(declarator, src);
244    let is_static = has_storage_class(node, src, "static");
245
246    Some(FunctionInfo {
247        name,
248        start_line,
249        end_line,
250        name_col,
251        name_end_col,
252        line_count: end_line - start_line + 1,
253        complexity: count_complexity(node),
254        body_hash: body.map(hash_ast),
255        is_exported: !is_static,
256        parameter_count: param_count,
257        parameter_types: param_types,
258        chain_depth: body.map(max_chain_depth).unwrap_or(0),
259        switch_arms: body.map(count_case_labels).unwrap_or(0),
260        external_refs: body
261            .map(|b| collect_external_refs_c(b, src))
262            .unwrap_or_default(),
263        is_delegating: body.map(|b| check_delegating_c(b, src)).unwrap_or(false),
264        comment_lines: count_comment_lines(node, src),
265        referenced_fields: body
266            .map(|b| collect_field_refs_c(b, src))
267            .unwrap_or_default(),
268        null_check_fields: body
269            .map(|b| collect_null_checks_c(b, src))
270            .unwrap_or_default(),
271        switch_dispatch_target: body.and_then(|b| extract_switch_target_c(b, src)),
272        optional_param_count: 0,
273        called_functions: body.map(|b| collect_calls_c(b, src)).unwrap_or_default(),
274        cognitive_complexity: body.map(cognitive_complexity_c).unwrap_or(0),
275    })
276}
277
278/// Check if a declaration node has a specific storage class specifier (e.g. "static").
279fn has_storage_class(node: Node, src: &[u8], keyword: &str) -> bool {
280    for i in 0..node.child_count() {
281        if let Some(child) = node.child(i)
282            && child.kind() == "storage_class_specifier"
283            && node_text(child, src) == keyword
284        {
285            return true;
286        }
287    }
288    false
289}
290
291fn find_func_name_node(declarator: Node) -> Option<Node> {
292    if declarator.kind() == "identifier" {
293        return Some(declarator);
294    }
295    declarator
296        .child_by_field_name("declarator")
297        .and_then(find_func_name_node)
298}
299
300fn extract_params(declarator: Node, src: &[u8]) -> (usize, Vec<String>) {
301    let params = match declarator.child_by_field_name("parameters") {
302        Some(p) => p,
303        None => return (0, vec![]),
304    };
305    let mut count = 0;
306    let mut types = Vec::new();
307    let mut cursor = params.walk();
308    for child in params.children(&mut cursor) {
309        if child.kind() == "parameter_declaration" {
310            count += 1;
311            let base = child
312                .child_by_field_name("type")
313                .map(|t| node_text(t, src).to_string())
314                .unwrap_or_else(|| "int".into());
315            let is_ptr = child
316                .child_by_field_name("declarator")
317                .is_some_and(|d| d.kind() == "pointer_declarator");
318            types.push(if is_ptr { format!("{base} *") } else { base });
319        }
320    }
321    (count, types)
322}
323
324fn extract_class(node: Node, src: &[u8]) -> Option<ClassInfo> {
325    let name_node = node.child_by_field_name("name");
326    let name = name_node
327        .map(|n| node_text(n, src).to_string())
328        .unwrap_or_default();
329    let name_col = name_node.map(|n| n.start_position().column).unwrap_or(0);
330    let name_end_col = name_node.map(|n| n.end_position().column).unwrap_or(0);
331    let start_line = node.start_position().row + 1;
332    let end_line = node.end_position().row + 1;
333    let body = node.child_by_field_name("body");
334    let method_count = body.map(count_methods).unwrap_or(0);
335    let (field_names, field_types, first_field_type) =
336        body.map(|b| extract_field_info(b, src)).unwrap_or_default();
337
338    Some(ClassInfo {
339        name,
340        start_line,
341        end_line,
342        name_col,
343        name_end_col,
344        line_count: end_line - start_line + 1,
345        method_count,
346        is_exported: true,
347        delegating_method_count: 0,
348        field_count: field_names.len(),
349        field_names,
350        field_types,
351        has_behavior: method_count > 0,
352        is_interface: false,
353        // First field type stored as parent candidate;
354        // build_class_graph validates against known struct names.
355        parent_name: first_field_type,
356        override_count: 0,
357        self_call_count: 0,
358        has_listener_field: false,
359        has_notify_method: false,
360    })
361}
362
363fn extract_field_info(body: Node, src: &[u8]) -> (Vec<String>, Vec<String>, Option<String>) {
364    let mut names = Vec::new();
365    let mut types = Vec::new();
366    let mut first_type = None;
367    let mut cursor = body.walk();
368    for child in body.children(&mut cursor) {
369        if child.kind() == "field_declaration" {
370            if let Some(decl) = child.child_by_field_name("declarator") {
371                names.push(node_text(decl, src).to_string());
372            }
373            let ty = child
374                .child_by_field_name("type")
375                .map(|t| node_text(t, src).to_string());
376            if first_type.is_none() {
377                first_type = ty.clone();
378            }
379            types.push(ty.unwrap_or_default());
380        }
381    }
382    (names, types, first_type)
383}
384
385fn count_methods(body: Node) -> usize {
386    let mut count = 0;
387    let mut cursor = body.walk();
388    for child in body.children(&mut cursor) {
389        if child.kind() == "function_definition" || child.kind() == "declaration" {
390            count += 1;
391        }
392    }
393    count
394}
395
396fn extract_include(node: Node, src: &[u8]) -> Option<ImportInfo> {
397    let path = node.child_by_field_name("path")?;
398    let text = node_text(path, src)
399        .trim_matches(|c| c == '"' || c == '<' || c == '>')
400        .to_string();
401    Some(ImportInfo {
402        source: text,
403        line: node.start_position().row + 1,
404        col: node.start_position().column,
405        ..Default::default()
406    })
407}
408
409fn count_complexity(node: Node) -> usize {
410    let mut c = 1usize;
411    let mut cursor = node.walk();
412    visit_all(node, &mut cursor, &mut |n| match n.kind() {
413        "if_statement"
414        | "for_statement"
415        | "while_statement"
416        | "do_statement"
417        | "case_statement"
418        | "catch_clause"
419        | "conditional_expression" => c += 1,
420        "binary_expression" => {
421            if let Some(op) = n.child_by_field_name("operator") {
422                let kind = op.kind();
423                if kind == "&&" || kind == "||" {
424                    c += 1;
425                }
426            }
427        }
428        _ => {}
429    });
430    c
431}
432
433fn max_chain_depth(node: Node) -> usize {
434    let mut max = 0;
435    let mut cursor = node.walk();
436    visit_all(node, &mut cursor, &mut |n| {
437        if n.kind() == "field_expression" {
438            let d = chain_len(n);
439            if d > max {
440                max = d;
441            }
442        }
443    });
444    max
445}
446
447fn chain_len(node: Node) -> usize {
448    let mut depth = 0;
449    let mut current = node;
450    while current.kind() == "field_expression" || current.kind() == "call_expression" {
451        if current.kind() == "field_expression" {
452            depth += 1;
453        }
454        match current.child(0) {
455            Some(c) => current = c,
456            None => break,
457        }
458    }
459    depth
460}
461
462fn count_case_labels(node: Node) -> usize {
463    let mut count = 0;
464    let mut cursor = node.walk();
465    visit_all(node, &mut cursor, &mut |n| {
466        if n.kind() == "case_statement" {
467            count += 1;
468        }
469    });
470    count
471}
472
473fn cognitive_complexity_c(node: tree_sitter::Node) -> usize {
474    let mut score = 0;
475    cc_walk_c(node, 0, &mut score);
476    score
477}
478
479fn cc_walk_c(node: tree_sitter::Node, nesting: usize, score: &mut usize) {
480    match node.kind() {
481        "if_statement" => {
482            *score += 1 + nesting;
483            cc_children_c(node, nesting + 1, score);
484            return;
485        }
486        "for_statement" | "while_statement" | "do_statement" => {
487            *score += 1 + nesting;
488            cc_children_c(node, nesting + 1, score);
489            return;
490        }
491        "switch_statement" => {
492            *score += 1 + nesting;
493            cc_children_c(node, nesting + 1, score);
494            return;
495        }
496        "else_clause" => {
497            *score += 1;
498        }
499        "binary_expression" => {
500            if let Some(op) = node.child_by_field_name("operator")
501                && (op.kind() == "&&" || op.kind() == "||")
502            {
503                *score += 1;
504            }
505        }
506        "catch_clause" => {
507            *score += 1 + nesting;
508            cc_children_c(node, nesting + 1, score);
509            return;
510        }
511        _ => {}
512    }
513    cc_children_c(node, nesting, score);
514}
515
516fn cc_children_c(node: tree_sitter::Node, nesting: usize, score: &mut usize) {
517    let mut cursor = node.walk();
518    for child in node.children(&mut cursor) {
519        cc_walk_c(child, nesting, score);
520    }
521}
522
523fn collect_external_refs_c(body: Node, src: &[u8]) -> Vec<String> {
524    let mut refs = Vec::new();
525    let mut cursor = body.walk();
526    visit_all(body, &mut cursor, &mut |n| {
527        if n.kind() == "field_expression"
528            && let Some(obj) = n.child(0)
529            && obj.kind() == "identifier"
530        {
531            let name = node_text(obj, src).to_string();
532            if !refs.contains(&name) {
533                refs.push(name);
534            }
535        }
536    });
537    refs
538}
539
540fn check_delegating_c(body: Node, src: &[u8]) -> bool {
541    let mut cursor = body.walk();
542    let stmts: Vec<Node> = body
543        .children(&mut cursor)
544        .filter(|n| n.kind() != "{" && n.kind() != "}" && !n.kind().contains("comment"))
545        .collect();
546    if stmts.len() != 1 {
547        return false;
548    }
549    let stmt = stmts[0];
550    let call = match stmt.kind() {
551        "return_statement" => stmt.child(1).filter(|c| c.kind() == "call_expression"),
552        "expression_statement" => stmt.child(0).filter(|c| c.kind() == "call_expression"),
553        _ => None,
554    };
555    call.and_then(|c| c.child(0))
556        .is_some_and(|f| node_text(f, src).contains('.') || node_text(f, src).contains("->"))
557}
558
559fn collect_field_refs_c(body: Node, src: &[u8]) -> Vec<String> {
560    let mut refs = Vec::new();
561    let mut cursor = body.walk();
562    visit_all(body, &mut cursor, &mut |n| {
563        if n.kind() == "field_expression"
564            && let Some(field) = n.child_by_field_name("field")
565        {
566            let name = node_text(field, src).to_string();
567            if !refs.contains(&name) {
568                refs.push(name);
569            }
570        }
571    });
572    refs
573}
574
575fn collect_null_checks_c(body: Node, src: &[u8]) -> Vec<String> {
576    let mut fields = Vec::new();
577    let mut cursor = body.walk();
578    visit_all(body, &mut cursor, &mut |n| {
579        if n.kind() == "binary_expression" {
580            let text = node_text(n, src);
581            if (text.contains("NULL") || text.contains("nullptr"))
582                && let Some(left) = n.child(0)
583            {
584                let name = node_text(left, src).to_string();
585                if !fields.contains(&name) {
586                    fields.push(name);
587                }
588            }
589        }
590    });
591    fields
592}
593
594fn extract_switch_target_c(body: Node, src: &[u8]) -> Option<String> {
595    let mut cursor = body.walk();
596    let mut target = None;
597    visit_all(body, &mut cursor, &mut |n| {
598        if n.kind() == "switch_statement"
599            && target.is_none()
600            && let Some(cond) = n.child_by_field_name("condition")
601        {
602            target = Some(node_text(cond, src).trim_matches(['(', ')']).to_string());
603        }
604    });
605    target
606}
607
608fn collect_calls_c(body: Node, src: &[u8]) -> Vec<String> {
609    let mut calls = Vec::new();
610    let mut cursor = body.walk();
611    visit_all(body, &mut cursor, &mut |n| {
612        if n.kind() == "call_expression"
613            && let Some(func) = n.child(0)
614        {
615            let name = node_text(func, src).to_string();
616            if !calls.contains(&name) {
617                calls.push(name);
618            }
619        }
620    });
621    calls
622}
623
624fn count_comment_lines(node: Node, src: &[u8]) -> usize {
625    let mut count = 0;
626    let mut cursor = node.walk();
627    visit_all(node, &mut cursor, &mut |n| {
628        if n.kind() == "comment" {
629            count += node_text(n, src).lines().count();
630        }
631    });
632    count
633}
634
635fn hash_ast(node: Node) -> u64 {
636    let mut hasher = DefaultHasher::new();
637    hash_node(node, &mut hasher);
638    hasher.finish()
639}
640
641fn hash_node(node: Node, hasher: &mut DefaultHasher) {
642    node.kind().hash(hasher);
643    let mut cursor = node.walk();
644    for child in node.children(&mut cursor) {
645        hash_node(child, hasher);
646    }
647}
648
649fn node_text<'a>(node: Node, src: &'a [u8]) -> &'a str {
650    node.utf8_text(src).unwrap_or("")
651}
652
653fn collect_comments(root: Node, src: &[u8]) -> Vec<cha_core::CommentInfo> {
654    let mut comments = Vec::new();
655    let mut cursor = root.walk();
656    visit_all(root, &mut cursor, &mut |n| {
657        if n.kind().contains("comment") {
658            comments.push(cha_core::CommentInfo {
659                text: node_text(n, src).to_string(),
660                line: n.start_position().row + 1,
661            });
662        }
663    });
664    comments
665}
666
667fn visit_all<F: FnMut(Node)>(node: Node, cursor: &mut tree_sitter::TreeCursor, f: &mut F) {
668    f(node);
669    if cursor.goto_first_child() {
670        loop {
671            let child_node = cursor.node();
672            let mut child_cursor = child_node.walk();
673            visit_all(child_node, &mut child_cursor, f);
674            if !cursor.goto_next_sibling() {
675                break;
676            }
677        }
678        cursor.goto_parent();
679    }
680}