Skip to main content

cha_parser/
c_lang.rs

1// cha:ignore large_file
2use std::collections::hash_map::DefaultHasher;
3use std::hash::{Hash, Hasher};
4
5use cha_core::{ClassInfo, FunctionInfo, ImportInfo, SourceFile, SourceModel};
6use tree_sitter::{Node, Parser};
7
8use crate::LanguageParser;
9
10pub struct CParser;
11pub struct CppParser;
12
13impl LanguageParser for CParser {
14    fn language_name(&self) -> &str {
15        "c"
16    }
17    fn ts_language(&self) -> tree_sitter::Language {
18        tree_sitter_c::LANGUAGE.into()
19    }
20    fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
21        parse_c_like(file, "c", &tree_sitter_c::LANGUAGE.into())
22    }
23}
24
25impl LanguageParser for CppParser {
26    fn language_name(&self) -> &str {
27        "cpp"
28    }
29    fn ts_language(&self) -> tree_sitter::Language {
30        tree_sitter_cpp::LANGUAGE.into()
31    }
32    fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
33        parse_c_like(file, "cpp", &tree_sitter_cpp::LANGUAGE.into())
34    }
35}
36
37fn parse_c_like(
38    file: &SourceFile,
39    lang: &str,
40    language: &tree_sitter::Language,
41) -> Option<SourceModel> {
42    let mut parser = Parser::new();
43    parser.set_language(language).ok()?;
44    let tree = parser.parse(&file.content, None)?;
45    let root = tree.root_node();
46    let src = file.content.as_bytes();
47
48    let mut functions = Vec::new();
49    let mut classes = Vec::new();
50    let mut imports = Vec::new();
51    let mut type_aliases = Vec::new();
52
53    let imports_map = crate::c_imports::build(root, src);
54    collect_top_level(
55        root,
56        src,
57        &imports_map,
58        &mut functions,
59        &mut classes,
60        &mut imports,
61        &mut type_aliases,
62    );
63
64    // (C OOP method attribution moved to `cha-cli::c_oop_enrich`, which has
65    // cross-file visibility — a function in `foo.c` can be attributed to a
66    // struct declared in `foo.h`.)
67
68    if is_header_file(file) {
69        for f in &mut functions {
70            f.is_exported = true;
71        }
72    }
73
74    Some(SourceModel {
75        language: lang.into(),
76        total_lines: file.line_count(),
77        functions,
78        classes,
79        imports,
80        comments: collect_comments(root, src),
81        type_aliases,
82    })
83}
84
85fn is_header_file(file: &SourceFile) -> bool {
86    file.path
87        .extension()
88        .is_some_and(|e| e == "h" || e == "hxx" || e == "hpp")
89}
90
91// cha:ignore cognitive_complexity
92// cha:ignore high_complexity
93fn collect_top_level(
94    root: Node,
95    src: &[u8],
96    imports_map: &crate::type_ref::ImportsMap,
97    functions: &mut Vec<FunctionInfo>,
98    classes: &mut Vec<ClassInfo>,
99    imports: &mut Vec<ImportInfo>,
100    type_aliases: &mut Vec<(String, String)>,
101) {
102    let mut cursor = root.walk();
103    for child in root.children(&mut cursor) {
104        match child.kind() {
105            "function_definition" => {
106                handle_function_definition(child, src, imports_map, functions, classes);
107            }
108            "declaration" => {
109                // Header-style function declarations (`void foo(int);` — no
110                // body) surface as `declaration` nodes in tree-sitter-c. Pick
111                // out the function_declarator variants and treat them as
112                // functions so `.h` file contents contribute to the project
113                // API surface / method attribution. Non-function
114                // `declaration` (globals, typedefs, extern vars) have no
115                // function_declarator child and are skipped.
116                if has_function_declarator(child)
117                    && let Some(f) = extract_function(child, src, imports_map)
118                {
119                    functions.push(f);
120                }
121            }
122            "struct_specifier" | "class_specifier" => {
123                if let Some(c) = extract_class(child, src) {
124                    classes.push(c);
125                }
126            }
127            "type_definition" => {
128                extract_typedef_struct(child, src, classes, type_aliases);
129            }
130            "preproc_include" => {
131                if let Some(imp) = extract_include(child, src) {
132                    imports.push(imp);
133                }
134            }
135            // C++ nesting constructs: recurse into them so inner
136            // functions/classes land at the top of `functions`/`classes`.
137            // `namespace { ... }` and `extern "C" { ... }` wrap their
138            // content in a `body` field; `template <...>` applies to its
139            // following sibling node directly (no body field). We just
140            // recurse into the whole subtree either way.
141            "namespace_definition" | "linkage_specification" | "template_declaration" => {
142                collect_top_level(
143                    child,
144                    src,
145                    imports_map,
146                    functions,
147                    classes,
148                    imports,
149                    type_aliases,
150                );
151            }
152            _ => {
153                if child.child_count() > 0 {
154                    collect_top_level(
155                        child,
156                        src,
157                        imports_map,
158                        functions,
159                        classes,
160                        imports,
161                        type_aliases,
162                    );
163                }
164            }
165        }
166    }
167}
168
169/// Process a `function_definition` node. Handles two C-specific edge
170/// cases (the `class MACRO Name {...}` macro-class pattern and plain
171/// functions) plus C++ out-of-class method attribution.
172fn handle_function_definition(
173    node: Node,
174    src: &[u8],
175    imports_map: &crate::type_ref::ImportsMap,
176    functions: &mut Vec<FunctionInfo>,
177    classes: &mut Vec<ClassInfo>,
178) {
179    if let Some(c) = try_extract_macro_class(node, src) {
180        classes.push(c);
181        return;
182    }
183    let Some(f) = extract_function(node, src, imports_map) else {
184        return;
185    };
186    if let Some(q) = crate::cpp::extract_class_qualifier(node, src) {
187        crate::cpp::attach_to_class(&q, classes);
188    }
189    functions.push(f);
190}
191
192fn extract_typedef_struct(
193    node: Node,
194    src: &[u8],
195    classes: &mut Vec<ClassInfo>,
196    type_aliases: &mut Vec<(String, String)>,
197) {
198    let found_struct = register_typedef_struct_children(node, src, classes, type_aliases);
199    if !found_struct {
200        register_simple_typedef(node, src, type_aliases);
201    }
202}
203
204fn register_typedef_struct_children(
205    node: Node,
206    src: &[u8],
207    classes: &mut Vec<ClassInfo>,
208    type_aliases: &mut Vec<(String, String)>,
209) -> bool {
210    let mut found_struct = false;
211    let mut inner = node.walk();
212    for sub in node.children(&mut inner) {
213        if sub.kind() != "struct_specifier" && sub.kind() != "class_specifier" {
214            continue;
215        }
216        found_struct = true;
217        register_single_typedef_struct(node, sub, src, classes, type_aliases);
218    }
219    found_struct
220}
221
222fn register_single_typedef_struct(
223    typedef: Node,
224    sub: Node,
225    src: &[u8],
226    classes: &mut Vec<ClassInfo>,
227    type_aliases: &mut Vec<(String, String)>,
228) {
229    let Some(mut c) = extract_class(sub, src) else {
230        return;
231    };
232    let original_name = c.name.clone();
233    if c.name.is_empty()
234        && let Some(decl) = typedef.child_by_field_name("declarator")
235    {
236        c.name = node_text(decl, src).to_string();
237    }
238    if !original_name.is_empty()
239        && let Some(decl) = typedef.child_by_field_name("declarator")
240    {
241        let alias = node_text(decl, src).to_string();
242        if alias != original_name {
243            type_aliases.push((alias, original_name));
244        }
245    }
246    if !c.name.is_empty() {
247        classes.push(c);
248    }
249}
250
251/// `typedef uint32_t tag_t;` — simple alias, no struct body.
252fn register_simple_typedef(node: Node, src: &[u8], type_aliases: &mut Vec<(String, String)>) {
253    let alias = extract_typedef_alias(node, src);
254    let original = node
255        .child_by_field_name("type")
256        .map(|t| node_text(t, src).trim().to_string())
257        .unwrap_or_default();
258    if !alias.is_empty() && alias != original {
259        type_aliases.push((alias, original));
260    }
261}
262
263/// Find the new type name in a `typedef <something> <name>;`. Tree-sitter-c
264/// sometimes puts the name behind the `declarator` field; other grammars
265/// (typedef of enum/union without body) emit a plain `type_identifier` as
266/// a top-level child. Try the field first, then the first type_identifier.
267fn extract_typedef_alias(node: Node, src: &[u8]) -> String {
268    if let Some(decl) = node.child_by_field_name("declarator") {
269        return node_text(decl, src).trim().to_string();
270    }
271    let mut cursor = node.walk();
272    for child in node.children(&mut cursor) {
273        if child.kind() == "type_identifier" {
274            return node_text(child, src).trim().to_string();
275        }
276    }
277    String::new()
278}
279
280/// Detect `class MACRO ClassName { ... };` misparse.
281/// tree-sitter sees this as function_definition with class_specifier return type.
282fn try_extract_macro_class(node: Node, src: &[u8]) -> Option<ClassInfo> {
283    let mut has_class_spec = false;
284    let mut cursor = node.walk();
285    for child in node.children(&mut cursor) {
286        if child.kind() == "class_specifier" || child.kind() == "struct_specifier" {
287            has_class_spec = true;
288        }
289    }
290    if !has_class_spec {
291        return None;
292    }
293    // The real class name is the "identifier" child (what tree-sitter thinks is the func name)
294    let name_node = node
295        .child_by_field_name("declarator")
296        .filter(|d| d.kind() == "identifier")?;
297    let name = node_text(name_node, src).to_string();
298    let name_col = name_node.start_position().column;
299    let name_end_col = name_node.end_position().column;
300    let body = node.child_by_field_name("body")?;
301    let start_line = node.start_position().row + 1;
302    let end_line = node.end_position().row + 1;
303    let method_count = count_methods(body);
304    let (field_names, field_types, first_field_type) = extract_field_info(body, src);
305
306    // Find parent from the class_specifier's base_class_clause if present
307    let parent_name = first_field_type;
308
309    Some(ClassInfo {
310        name,
311        start_line,
312        end_line,
313        name_col,
314        name_end_col,
315        line_count: end_line - start_line + 1,
316        method_count,
317        is_exported: true,
318        delegating_method_count: 0,
319        field_count: field_names.len(),
320        field_names,
321        field_types,
322        has_behavior: method_count > 0,
323        is_interface: false,
324        parent_name,
325        override_count: 0,
326        self_call_count: 0,
327        has_listener_field: false,
328        has_notify_method: false,
329    })
330}
331
332fn extract_function(
333    node: Node,
334    src: &[u8],
335    imports_map: &crate::type_ref::ImportsMap,
336) -> Option<FunctionInfo> {
337    let declarator = node.child_by_field_name("declarator")?;
338    let name_node = find_func_name_node(declarator)?;
339    let name = node_text(name_node, src).to_string();
340    let name_col = name_node.start_position().column;
341    let name_end_col = name_node.end_position().column;
342    let start_line = node.start_position().row + 1;
343    let end_line = node.end_position().row + 1;
344    let body = node.child_by_field_name("body");
345    let (param_count, param_types, param_names) = extract_params(declarator, src, imports_map);
346    let is_static = has_storage_class(node, src, "static");
347
348    Some(FunctionInfo {
349        name,
350        start_line,
351        end_line,
352        name_col,
353        name_end_col,
354        line_count: end_line - start_line + 1,
355        complexity: count_complexity(node),
356        body_hash: body.map(hash_ast),
357        is_exported: !is_static,
358        parameter_count: param_count,
359        parameter_types: param_types,
360        parameter_names: param_names,
361        chain_depth: body.map(max_chain_depth).unwrap_or(0),
362        switch_arms: body.map(count_case_labels).unwrap_or(0),
363        switch_arm_values: body
364            .map(|b| collect_c_arm_values(b, src))
365            .unwrap_or_default(),
366        external_refs: body
367            .map(|b| collect_external_refs_c(b, src))
368            .unwrap_or_default(),
369        is_delegating: body.map(|b| check_delegating_c(b, src)).unwrap_or(false),
370        comment_lines: count_comment_lines(node, src),
371        referenced_fields: body
372            .map(|b| collect_field_refs_c(b, src))
373            .unwrap_or_default(),
374        null_check_fields: body
375            .map(|b| collect_null_checks_c(b, src))
376            .unwrap_or_default(),
377        switch_dispatch_target: body.and_then(|b| extract_switch_target_c(b, src)),
378        optional_param_count: 0,
379        called_functions: body.map(|b| collect_calls_c(b, src)).unwrap_or_default(),
380        cognitive_complexity: body.map(cognitive_complexity_c).unwrap_or(0),
381        return_type: extract_c_return_type(node, src, imports_map),
382    })
383}
384
385/// The C function's return type lives in the `type` field of the
386/// `function_definition` node. Pointer return types are indicated by the
387/// declarator having a `pointer_declarator`; prefix the type with ` *`
388/// so the `raw` text mirrors the written form.
389fn extract_c_return_type(
390    node: Node,
391    src: &[u8],
392    imports_map: &crate::type_ref::ImportsMap,
393) -> Option<cha_core::TypeRef> {
394    let ty = node.child_by_field_name("type")?;
395    let base = node_text(ty, src).trim().to_string();
396    let is_ptr = node
397        .child_by_field_name("declarator")
398        .is_some_and(|d| d.kind() == "pointer_declarator");
399    let raw = if is_ptr { format!("{base} *") } else { base };
400    Some(crate::type_ref::resolve(raw, imports_map))
401}
402
403/// Does this `declaration` node actually declare a function (as opposed
404/// to a variable / typedef / extern)? tree-sitter-c wraps function
405/// prototypes in `declaration` with a `function_declarator` descendant.
406fn has_function_declarator(node: Node) -> bool {
407    node.child_by_field_name("declarator")
408        .is_some_and(has_function_declarator_inside)
409}
410
411fn has_function_declarator_inside(node: Node) -> bool {
412    if node.kind() == "function_declarator" {
413        return true;
414    }
415    // Pointer return types wrap the declarator: `int *foo(...)` produces
416    // `pointer_declarator { function_declarator { ... } }`. Unwrap.
417    if let Some(inner) = node.child_by_field_name("declarator") {
418        return has_function_declarator_inside(inner);
419    }
420    false
421}
422
423/// Check if a declaration node has a specific storage class specifier (e.g. "static").
424fn has_storage_class(node: Node, src: &[u8], keyword: &str) -> bool {
425    for i in 0..node.child_count() {
426        if let Some(child) = node.child(i)
427            && child.kind() == "storage_class_specifier"
428            && node_text(child, src) == keyword
429        {
430            return true;
431        }
432    }
433    false
434}
435
436fn find_func_name_node(declarator: Node) -> Option<Node> {
437    // Plain C: declarator chain bottoms out in `identifier`. C++ adds:
438    // `field_identifier` (in-class member), `destructor_name`,
439    // `operator_name`, and `qualified_identifier` (out-of-class / global).
440    match declarator.kind() {
441        "identifier" | "field_identifier" | "destructor_name" | "operator_name" => {
442            return Some(declarator);
443        }
444        "qualified_identifier" => return crate::cpp::qualified_identifier_leaf(declarator),
445        _ => {}
446    }
447    // `pointer_declarator` has a `declarator` field; `reference_declarator`
448    // does not — its named children are the sub-declarator positionally.
449    // Fall back to the first named child if the field is absent.
450    let next = declarator
451        .child_by_field_name("declarator")
452        .or_else(|| first_named_child(declarator));
453    next.and_then(find_func_name_node)
454}
455
456fn first_named_child(node: Node) -> Option<Node> {
457    let mut c = node.walk();
458    node.children(&mut c).find(|n| n.is_named())
459}
460
461fn extract_params(
462    declarator: Node,
463    src: &[u8],
464    imports_map: &crate::type_ref::ImportsMap,
465) -> (usize, Vec<cha_core::TypeRef>, Vec<String>) {
466    let params = match declarator.child_by_field_name("parameters") {
467        Some(p) => p,
468        None => return (0, vec![], vec![]),
469    };
470    let mut count = 0;
471    let mut types = Vec::new();
472    let mut names = Vec::new();
473    let mut cursor = params.walk();
474    for child in params.children(&mut cursor) {
475        if child.kind() == "parameter_declaration" {
476            count += 1;
477            let base = child
478                .child_by_field_name("type")
479                .map(|t| node_text(t, src).to_string())
480                .unwrap_or_else(|| "int".into());
481            let decl = child.child_by_field_name("declarator");
482            let is_ptr = decl.is_some_and(|d| d.kind() == "pointer_declarator");
483            let raw = if is_ptr { format!("{base} *") } else { base };
484            types.push(crate::type_ref::resolve(raw, imports_map));
485            names.push(
486                decl.map(|d| crate::cpp::c_param_name(d, src))
487                    .unwrap_or_default(),
488            );
489        }
490    }
491    (count, types, names)
492}
493
494fn extract_class(node: Node, src: &[u8]) -> Option<ClassInfo> {
495    let (name, name_col, name_end_col) =
496        crate::cpp::class_name_triple(node.child_by_field_name("name"), src);
497    let start_line = node.start_position().row + 1;
498    let end_line = node.end_position().row + 1;
499    let body = node.child_by_field_name("body");
500    let method_count = body.map(count_methods).unwrap_or(0);
501    let (field_names, field_types, first_field_type) =
502        body.map(|b| extract_field_info(b, src)).unwrap_or_default();
503
504    // C++ inheritance (`class Derived : public Base`) takes precedence
505    // over the "first field looks like an embedded base struct" C heuristic.
506    // `crate::cpp::extract_cpp_base` returns `None` for plain C
507    // struct_specifier (no base_class_clause child), so the old behaviour
508    // is preserved where real inheritance syntax isn't present.
509    let parent_name = crate::cpp::extract_cpp_base(node, src).or(first_field_type);
510
511    Some(ClassInfo {
512        name,
513        start_line,
514        end_line,
515        name_col,
516        name_end_col,
517        line_count: end_line - start_line + 1,
518        method_count,
519        is_exported: true,
520        delegating_method_count: 0,
521        field_count: field_names.len(),
522        field_names,
523        field_types,
524        has_behavior: method_count > 0,
525        is_interface: false,
526        parent_name,
527        override_count: 0,
528        self_call_count: 0,
529        has_listener_field: false,
530        has_notify_method: false,
531    })
532}
533
534fn extract_field_info(body: Node, src: &[u8]) -> (Vec<String>, Vec<String>, Option<String>) {
535    let mut names = Vec::new();
536    let mut types = Vec::new();
537    let mut first_type = None;
538    let mut cursor = body.walk();
539    for child in body.children(&mut cursor) {
540        if child.kind() == "field_declaration" {
541            if let Some(decl) = child.child_by_field_name("declarator") {
542                names.push(node_text(decl, src).to_string());
543            }
544            let ty = child
545                .child_by_field_name("type")
546                .map(|t| node_text(t, src).to_string());
547            if first_type.is_none() {
548                first_type = ty.clone();
549            }
550            types.push(ty.unwrap_or_default());
551        }
552    }
553    (names, types, first_type)
554}
555
556fn count_methods(body: Node) -> usize {
557    let mut count = 0;
558    let mut cursor = body.walk();
559    for child in body.children(&mut cursor) {
560        if child.kind() == "function_definition" || child.kind() == "declaration" {
561            count += 1;
562        }
563    }
564    count
565}
566
567fn extract_include(node: Node, src: &[u8]) -> Option<ImportInfo> {
568    let path = node.child_by_field_name("path")?;
569    let text = node_text(path, src)
570        .trim_matches(|c| c == '"' || c == '<' || c == '>')
571        .to_string();
572    Some(ImportInfo {
573        source: text,
574        line: node.start_position().row + 1,
575        col: node.start_position().column,
576        ..Default::default()
577    })
578}
579
580fn count_complexity(node: Node) -> usize {
581    let mut c = 1usize;
582    let mut cursor = node.walk();
583    visit_all(node, &mut cursor, &mut |n| match n.kind() {
584        "if_statement"
585        | "for_statement"
586        | "while_statement"
587        | "do_statement"
588        | "case_statement"
589        | "catch_clause"
590        | "conditional_expression" => c += 1,
591        "binary_expression" => {
592            if let Some(op) = n.child_by_field_name("operator") {
593                let kind = op.kind();
594                if kind == "&&" || kind == "||" {
595                    c += 1;
596                }
597            }
598        }
599        _ => {}
600    });
601    c
602}
603
604fn max_chain_depth(node: Node) -> usize {
605    let mut max = 0;
606    let mut cursor = node.walk();
607    visit_all(node, &mut cursor, &mut |n| {
608        if n.kind() == "field_expression" {
609            let d = chain_len(n);
610            if d > max {
611                max = d;
612            }
613        }
614    });
615    max
616}
617
618fn chain_len(node: Node) -> usize {
619    let mut depth = 0;
620    let mut current = node;
621    while current.kind() == "field_expression" || current.kind() == "call_expression" {
622        if current.kind() == "field_expression" {
623            depth += 1;
624        }
625        match current.child(0) {
626            Some(c) => current = c,
627            None => break,
628        }
629    }
630    depth
631}
632
633fn collect_c_arm_values(body: Node, src: &[u8]) -> Vec<cha_core::ArmValue> {
634    let mut out = Vec::new();
635    crate::switch_arms::walk_arms(body, src, &mut out, &|n| n.kind() == "case_statement");
636    out
637}
638
639fn count_case_labels(node: Node) -> usize {
640    let mut count = 0;
641    let mut cursor = node.walk();
642    visit_all(node, &mut cursor, &mut |n| {
643        if n.kind() == "case_statement" {
644            count += 1;
645        }
646    });
647    count
648}
649
650fn cognitive_complexity_c(node: tree_sitter::Node) -> usize {
651    let mut score = 0;
652    cc_walk_c(node, 0, &mut score);
653    score
654}
655
656fn cc_walk_c(node: tree_sitter::Node, nesting: usize, score: &mut usize) {
657    match node.kind() {
658        "if_statement" => {
659            *score += 1 + nesting;
660            cc_children_c(node, nesting + 1, score);
661            return;
662        }
663        "for_statement" | "while_statement" | "do_statement" => {
664            *score += 1 + nesting;
665            cc_children_c(node, nesting + 1, score);
666            return;
667        }
668        "switch_statement" => {
669            *score += 1 + nesting;
670            cc_children_c(node, nesting + 1, score);
671            return;
672        }
673        "else_clause" => {
674            *score += 1;
675        }
676        "binary_expression" => {
677            if let Some(op) = node.child_by_field_name("operator")
678                && (op.kind() == "&&" || op.kind() == "||")
679            {
680                *score += 1;
681            }
682        }
683        "catch_clause" => {
684            *score += 1 + nesting;
685            cc_children_c(node, nesting + 1, score);
686            return;
687        }
688        _ => {}
689    }
690    cc_children_c(node, nesting, score);
691}
692
693fn cc_children_c(node: tree_sitter::Node, nesting: usize, score: &mut usize) {
694    let mut cursor = node.walk();
695    for child in node.children(&mut cursor) {
696        cc_walk_c(child, nesting, score);
697    }
698}
699
700fn collect_external_refs_c(body: Node, src: &[u8]) -> Vec<String> {
701    let mut refs = Vec::new();
702    let mut cursor = body.walk();
703    visit_all(body, &mut cursor, &mut |n| {
704        if n.kind() == "field_expression"
705            && let Some(obj) = n.child(0)
706            && obj.kind() == "identifier"
707        {
708            let name = node_text(obj, src).to_string();
709            if !refs.contains(&name) {
710                refs.push(name);
711            }
712        }
713    });
714    refs
715}
716
717fn check_delegating_c(body: Node, src: &[u8]) -> bool {
718    let mut cursor = body.walk();
719    let stmts: Vec<Node> = body
720        .children(&mut cursor)
721        .filter(|n| n.kind() != "{" && n.kind() != "}" && !n.kind().contains("comment"))
722        .collect();
723    if stmts.len() != 1 {
724        return false;
725    }
726    let stmt = stmts[0];
727    let call = match stmt.kind() {
728        "return_statement" => stmt.child(1).filter(|c| c.kind() == "call_expression"),
729        "expression_statement" => stmt.child(0).filter(|c| c.kind() == "call_expression"),
730        _ => None,
731    };
732    call.and_then(|c| c.child(0))
733        .is_some_and(|f| node_text(f, src).contains('.') || node_text(f, src).contains("->"))
734}
735
736fn collect_field_refs_c(body: Node, src: &[u8]) -> Vec<String> {
737    let mut refs = Vec::new();
738    let mut cursor = body.walk();
739    visit_all(body, &mut cursor, &mut |n| {
740        if n.kind() == "field_expression"
741            && let Some(field) = n.child_by_field_name("field")
742        {
743            let name = node_text(field, src).to_string();
744            if !refs.contains(&name) {
745                refs.push(name);
746            }
747        }
748    });
749    refs
750}
751
752fn collect_null_checks_c(body: Node, src: &[u8]) -> Vec<String> {
753    let mut fields = Vec::new();
754    let mut cursor = body.walk();
755    visit_all(body, &mut cursor, &mut |n| {
756        if n.kind() == "binary_expression" {
757            let text = node_text(n, src);
758            if (text.contains("NULL") || text.contains("nullptr"))
759                && let Some(left) = n.child(0)
760            {
761                let name = node_text(left, src).to_string();
762                if !fields.contains(&name) {
763                    fields.push(name);
764                }
765            }
766        }
767    });
768    fields
769}
770
771fn extract_switch_target_c(body: Node, src: &[u8]) -> Option<String> {
772    let mut cursor = body.walk();
773    let mut target = None;
774    visit_all(body, &mut cursor, &mut |n| {
775        if n.kind() == "switch_statement"
776            && target.is_none()
777            && let Some(cond) = n.child_by_field_name("condition")
778        {
779            target = Some(node_text(cond, src).trim_matches(['(', ')']).to_string());
780        }
781    });
782    target
783}
784
785fn collect_calls_c(body: Node, src: &[u8]) -> Vec<String> {
786    let mut calls = Vec::new();
787    let mut cursor = body.walk();
788    visit_all(body, &mut cursor, &mut |n| {
789        if n.kind() == "call_expression"
790            && let Some(func) = n.child(0)
791        {
792            let name = node_text(func, src).to_string();
793            if !calls.contains(&name) {
794                calls.push(name);
795            }
796        }
797    });
798    calls
799}
800
801fn count_comment_lines(node: Node, src: &[u8]) -> usize {
802    let mut count = 0;
803    let mut cursor = node.walk();
804    visit_all(node, &mut cursor, &mut |n| {
805        if n.kind() == "comment" {
806            count += node_text(n, src).lines().count();
807        }
808    });
809    count
810}
811
812fn hash_ast(node: Node) -> u64 {
813    let mut hasher = DefaultHasher::new();
814    hash_node(node, &mut hasher);
815    hasher.finish()
816}
817
818fn hash_node(node: Node, hasher: &mut DefaultHasher) {
819    node.kind().hash(hasher);
820    let mut cursor = node.walk();
821    for child in node.children(&mut cursor) {
822        hash_node(child, hasher);
823    }
824}
825
826fn node_text<'a>(node: Node, src: &'a [u8]) -> &'a str {
827    node.utf8_text(src).unwrap_or("")
828}
829
830fn collect_comments(root: Node, src: &[u8]) -> Vec<cha_core::CommentInfo> {
831    let mut comments = Vec::new();
832    let mut cursor = root.walk();
833    visit_all(root, &mut cursor, &mut |n| {
834        if n.kind().contains("comment") {
835            comments.push(cha_core::CommentInfo {
836                text: node_text(n, src).to_string(),
837                line: n.start_position().row + 1,
838            });
839        }
840    });
841    comments
842}
843
844fn visit_all<F: FnMut(Node)>(node: Node, cursor: &mut tree_sitter::TreeCursor, f: &mut F) {
845    f(node);
846    if cursor.goto_first_child() {
847        loop {
848            let child_node = cursor.node();
849            let mut child_cursor = child_node.walk();
850            visit_all(child_node, &mut child_cursor, f);
851            if !cursor.goto_next_sibling() {
852                break;
853            }
854        }
855        cursor.goto_parent();
856    }
857}