Skip to main content

cha_parser/
golang.rs

1use std::collections::hash_map::DefaultHasher;
2use std::hash::{Hash, Hasher};
3
4use cha_core::{ClassInfo, FunctionInfo, ImportInfo, SourceFile, SourceModel};
5use tree_sitter::{Node, Parser};
6
7use crate::LanguageParser;
8
9pub struct GolangParser;
10
11impl LanguageParser for GolangParser {
12    fn language_name(&self) -> &str {
13        "go"
14    }
15
16    fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
17        let mut parser = Parser::new();
18        parser.set_language(&tree_sitter_go::LANGUAGE.into()).ok()?;
19        let tree = parser.parse(&file.content, None)?;
20        let root = tree.root_node();
21        let src = file.content.as_bytes();
22
23        let mut functions = Vec::new();
24        let mut classes = Vec::new();
25        let mut imports = Vec::new();
26        let mut type_aliases = Vec::new();
27
28        let imports_map = crate::golang_imports::build(root, src, &file.path);
29        collect_top_level(
30            root,
31            src,
32            &imports_map,
33            &mut functions,
34            &mut classes,
35            &mut imports,
36            &mut type_aliases,
37        );
38
39        Some(SourceModel {
40            language: "go".into(),
41            total_lines: file.line_count(),
42            functions,
43            classes,
44            imports,
45            comments: collect_comments(root, src),
46            type_aliases,
47        })
48    }
49}
50
51fn collect_top_level(
52    root: Node,
53    src: &[u8],
54    imports_map: &crate::type_ref::ImportsMap,
55    functions: &mut Vec<FunctionInfo>,
56    classes: &mut Vec<ClassInfo>,
57    imports: &mut Vec<ImportInfo>,
58    type_aliases: &mut Vec<(String, String)>,
59) {
60    let mut cursor = root.walk();
61    for child in root.children(&mut cursor) {
62        match child.kind() {
63            "function_declaration" | "method_declaration" => {
64                if let Some(f) = extract_function(child, src, imports_map) {
65                    functions.push(f);
66                }
67            }
68            "type_declaration" => extract_type_decl(child, src, classes, type_aliases),
69            "import_declaration" => collect_imports(child, src, imports),
70            _ => {}
71        }
72    }
73}
74
75fn extract_function(
76    node: Node,
77    src: &[u8],
78    imports_map: &crate::type_ref::ImportsMap,
79) -> Option<FunctionInfo> {
80    let name_node = node.child_by_field_name("name")?;
81    let name = node_text(name_node, src).to_string();
82    let name_col = name_node.start_position().column;
83    let name_end_col = name_node.end_position().column;
84    let start_line = node.start_position().row + 1;
85    let end_line = node.end_position().row + 1;
86    let body = node.child_by_field_name("body");
87    let params = node.child_by_field_name("parameters");
88    let (param_count, param_types, param_names) = params
89        .map(|p| extract_params(p, src, imports_map))
90        .unwrap_or((0, vec![], vec![]));
91    let is_exported = name.starts_with(|c: char| c.is_uppercase());
92
93    Some(FunctionInfo {
94        name,
95        start_line,
96        end_line,
97        name_col,
98        name_end_col,
99        line_count: end_line - start_line + 1,
100        complexity: count_complexity(node),
101        body_hash: body.map(hash_ast),
102        is_exported,
103        parameter_count: param_count,
104        parameter_types: param_types,
105        parameter_names: param_names,
106        chain_depth: body.map(max_chain_depth).unwrap_or(0),
107        switch_arms: body.map(count_case_clauses).unwrap_or(0),
108        switch_arm_values: body
109            .map(|b| collect_go_arm_values(b, src))
110            .unwrap_or_default(),
111        external_refs: body
112            .map(|b| collect_external_refs(b, src))
113            .unwrap_or_default(),
114        is_delegating: body.map(|b| check_delegating(b, src)).unwrap_or(false),
115        comment_lines: count_comment_lines(node, src),
116        referenced_fields: body
117            .map(|b| collect_field_refs_go(b, src))
118            .unwrap_or_default(),
119        null_check_fields: body.map(|b| collect_nil_checks(b, src)).unwrap_or_default(),
120        switch_dispatch_target: body.and_then(|b| extract_switch_target_go(b, src)),
121        optional_param_count: 0,
122        called_functions: collect_calls(body, src),
123        cognitive_complexity: body.map(|b| cognitive_complexity_go(b)).unwrap_or(0),
124        return_type: node
125            .child_by_field_name("result")
126            .map(|rt| crate::type_ref::resolve(node_text(rt, src), imports_map)),
127    })
128}
129
130fn extract_type_decl(
131    node: Node,
132    src: &[u8],
133    classes: &mut Vec<ClassInfo>,
134    type_aliases: &mut Vec<(String, String)>,
135) {
136    // Go distinguishes `type X = Y` (alias, `type_alias` node) from
137    // `type X Y` (defined type, `type_spec` node). Only the former is a
138    // transparent alias worth recording for TypeRef origin resolution.
139    let mut cursor = node.walk();
140    for child in node.children(&mut cursor) {
141        match child.kind() {
142            "type_spec" => {
143                if let Some(c) = extract_struct(child, src) {
144                    classes.push(c);
145                }
146            }
147            "type_alias" => {
148                if let Some(pair) = crate::type_aliases::go(child, src) {
149                    type_aliases.push(pair);
150                }
151            }
152            _ => {}
153        }
154    }
155}
156
157fn extract_struct(node: Node, src: &[u8]) -> Option<ClassInfo> {
158    let name_node = node.child_by_field_name("name")?;
159    let name = node_text(name_node, src).to_string();
160    let name_col = name_node.start_position().column;
161    let name_end_col = name_node.end_position().column;
162    let type_node = node.child_by_field_name("type")?;
163    if type_node.kind() != "struct_type" && type_node.kind() != "interface_type" {
164        return None;
165    }
166    let is_interface = type_node.kind() == "interface_type";
167    let start_line = node.start_position().row + 1;
168    let end_line = node.end_position().row + 1;
169    let field_count = count_struct_fields(type_node);
170    let is_exported = name.starts_with(|c: char| c.is_uppercase());
171
172    Some(ClassInfo {
173        name,
174        start_line,
175        end_line,
176        name_col,
177        name_end_col,
178        line_count: end_line - start_line + 1,
179        method_count: 0,
180        is_exported,
181        delegating_method_count: 0,
182        field_count,
183        field_names: Vec::new(),
184        field_types: Vec::new(),
185        has_behavior: false,
186        is_interface,
187        parent_name: None,
188        override_count: 0,
189        self_call_count: 0,
190        has_listener_field: false,
191        has_notify_method: false,
192    })
193}
194
195fn count_struct_fields(node: Node) -> usize {
196    let mut count = 0;
197    let mut cursor = node.walk();
198    visit_all(node, &mut cursor, &mut |n| {
199        if n.kind() == "field_declaration" {
200            count += 1;
201        }
202    });
203    count
204}
205
206fn collect_imports(node: Node, src: &[u8], imports: &mut Vec<ImportInfo>) {
207    let mut cursor = node.walk();
208    visit_all(node, &mut cursor, &mut |n| {
209        if n.kind() == "import_spec" {
210            let line = n.start_position().row + 1;
211            let col = n.start_position().column;
212            let path_node = n.child_by_field_name("path").unwrap_or(n);
213            let text = node_text(path_node, src).trim_matches('"').to_string();
214            if !text.is_empty() {
215                imports.push(ImportInfo {
216                    source: text,
217                    line,
218                    col,
219                    ..Default::default()
220                });
221            }
222        }
223    });
224}
225
226fn extract_params(
227    params: Node,
228    src: &[u8],
229    imports_map: &crate::type_ref::ImportsMap,
230) -> (usize, Vec<cha_core::TypeRef>, Vec<String>) {
231    let mut count = 0;
232    let mut types = Vec::new();
233    let mut names_out = Vec::new();
234    let mut cursor = params.walk();
235    for child in params.children(&mut cursor) {
236        if child.kind() == "parameter_declaration" {
237            let raw = child
238                .child_by_field_name("type")
239                .map(|t| node_text(t, src).to_string())
240                .unwrap_or_else(|| "any".into());
241            // Each parameter_declaration can introduce multiple names
242            // sharing one type (`a, b int`). Expand them out to one
243            // TypeRef + name pair each.
244            let mut inner = child.walk();
245            let idents: Vec<String> = child
246                .children(&mut inner)
247                .filter(|c| c.kind() == "identifier")
248                .map(|c| node_text(c, src).to_string())
249                .collect();
250            if idents.is_empty() {
251                count += 1;
252                types.push(resolve_go_type(&raw, imports_map));
253                names_out.push(String::new());
254            } else {
255                for n in idents {
256                    count += 1;
257                    types.push(resolve_go_type(&raw, imports_map));
258                    names_out.push(n);
259                }
260            }
261        }
262    }
263    (count, types, names_out)
264}
265
266/// Go types are `pkg.TypeName` or `*pkg.TypeName`; the importable name in
267/// ImportsMap is the package alias. Split on `.`, look up the first segment.
268fn resolve_go_type(raw: &str, imports_map: &crate::type_ref::ImportsMap) -> cha_core::TypeRef {
269    // Strip decorations to get the inner `pkg.Type` or `Type`.
270    let inner = raw.trim_start_matches('*').trim_start_matches('[').trim();
271    let inner = inner.trim_start_matches(']').trim();
272    let mut parts = inner.splitn(2, '.');
273    let first = parts.next().unwrap_or(inner);
274    let second = parts.next();
275    let (short_name, origin) = if let Some(type_part) = second {
276        let origin = imports_map
277            .get(first)
278            .cloned()
279            .unwrap_or(cha_core::TypeOrigin::Unknown);
280        (type_part.to_string(), origin)
281    } else {
282        // No `.` → builtin type (string, int, bool, etc.) or locally-declared
283        // type. Treat builtin primitives accordingly; everything else → Local.
284        let origin = if is_go_builtin(inner) {
285            cha_core::TypeOrigin::Primitive
286        } else {
287            cha_core::TypeOrigin::Local
288        };
289        (inner.to_string(), origin)
290    };
291    cha_core::TypeRef {
292        name: short_name,
293        raw: raw.to_string(),
294        origin,
295    }
296}
297
298fn is_go_builtin(name: &str) -> bool {
299    matches!(
300        name,
301        "bool"
302            | "byte"
303            | "complex64"
304            | "complex128"
305            | "error"
306            | "float32"
307            | "float64"
308            | "int"
309            | "int8"
310            | "int16"
311            | "int32"
312            | "int64"
313            | "rune"
314            | "string"
315            | "uint"
316            | "uint8"
317            | "uint16"
318            | "uint32"
319            | "uint64"
320            | "uintptr"
321            | "any"
322            | "interface{}"
323    )
324}
325
326fn count_complexity(node: Node) -> usize {
327    let mut c = 1usize;
328    let mut cursor = node.walk();
329    visit_all(node, &mut cursor, &mut |n| match n.kind() {
330        "if_statement" | "for_statement" | "expression_case" | "default_case" | "type_case"
331        | "select_statement" | "go_statement" => c += 1,
332        "binary_expression" => {
333            if let Some(op) = n.child_by_field_name("operator") {
334                let kind = op.kind();
335                if kind == "&&" || kind == "||" {
336                    c += 1;
337                }
338            }
339        }
340        _ => {}
341    });
342    c
343}
344
345fn max_chain_depth(node: Node) -> usize {
346    let mut max = 0;
347    let mut cursor = node.walk();
348    visit_all(node, &mut cursor, &mut |n| {
349        if n.kind() == "selector_expression" {
350            let d = chain_len(n);
351            if d > max {
352                max = d;
353            }
354        }
355    });
356    max
357}
358
359fn chain_len(node: Node) -> usize {
360    let mut depth = 0;
361    let mut current = node;
362    while current.kind() == "selector_expression" || current.kind() == "call_expression" {
363        if current.kind() == "selector_expression" {
364            depth += 1;
365        }
366        match current.child(0) {
367            Some(c) => current = c,
368            None => break,
369        }
370    }
371    depth
372}
373
374fn collect_go_arm_values(body: Node, src: &[u8]) -> Vec<cha_core::ArmValue> {
375    let mut out = Vec::new();
376    crate::switch_arms::walk_arms(body, src, &mut out, &|n| n.kind() == "expression_case");
377    out
378}
379
380fn count_case_clauses(node: Node) -> usize {
381    let mut count = 0;
382    let mut cursor = node.walk();
383    visit_all(node, &mut cursor, &mut |n| {
384        if n.kind() == "expression_case" || n.kind() == "default_case" || n.kind() == "type_case" {
385            count += 1;
386        }
387    });
388    count
389}
390
391fn collect_external_refs(node: Node, src: &[u8]) -> Vec<String> {
392    let mut refs = Vec::new();
393    let mut cursor = node.walk();
394    visit_all(node, &mut cursor, &mut |n| {
395        if n.kind() == "selector_expression"
396            && let Some(obj) = n.child(0)
397            && obj.kind() == "identifier"
398        {
399            let text = node_text(obj, src).to_string();
400            if !refs.contains(&text) {
401                refs.push(text);
402            }
403        }
404    });
405    refs
406}
407
408fn check_delegating(body: Node, src: &[u8]) -> bool {
409    let mut cursor = body.walk();
410    let stmts: Vec<Node> = body
411        .children(&mut cursor)
412        .filter(|n| n.kind() != "{" && n.kind() != "}" && n.kind() != "comment")
413        .collect();
414    if stmts.len() != 1 {
415        return false;
416    }
417    let stmt = stmts[0];
418    let call = match stmt.kind() {
419        "return_statement" => stmt.child(1).filter(|c| c.kind() == "call_expression"),
420        "expression_statement" => stmt.child(0).filter(|c| c.kind() == "call_expression"),
421        _ => None,
422    };
423    call.and_then(|c| c.child(0))
424        .is_some_and(|f| node_text(f, src).contains('.'))
425}
426
427fn count_comment_lines(node: Node, src: &[u8]) -> usize {
428    let mut count = 0;
429    let mut cursor = node.walk();
430    visit_all(node, &mut cursor, &mut |n| {
431        if n.kind() == "comment" {
432            count += node_text(n, src).lines().count();
433        }
434    });
435    count
436}
437
438fn collect_nil_checks(body: Node, src: &[u8]) -> Vec<String> {
439    let mut fields = Vec::new();
440    let mut cursor = body.walk();
441    visit_all(body, &mut cursor, &mut |n| {
442        if n.kind() != "binary_expression" {
443            return;
444        }
445        let text = node_text(n, src);
446        if !text.contains("nil") {
447            return;
448        }
449        if let Some(left) = n.child(0) {
450            let name = node_text(left, src).to_string();
451            if !fields.contains(&name) {
452                fields.push(name);
453            }
454        }
455    });
456    fields
457}
458
459fn hash_ast(node: Node) -> u64 {
460    let mut hasher = DefaultHasher::new();
461    hash_node(node, &mut hasher);
462    hasher.finish()
463}
464
465fn hash_node(node: Node, hasher: &mut DefaultHasher) {
466    node.kind().hash(hasher);
467    let mut cursor = node.walk();
468    for child in node.children(&mut cursor) {
469        hash_node(child, hasher);
470    }
471}
472
473fn cognitive_complexity_go(node: Node) -> usize {
474    let mut score = 0;
475    cc_walk(node, 0, &mut score);
476    score
477}
478
479fn cc_walk(node: Node, nesting: usize, score: &mut usize) {
480    match node.kind() {
481        "if_statement" => {
482            *score += 1 + nesting;
483            cc_children(node, nesting + 1, score);
484            return;
485        }
486        "for_statement" => {
487            *score += 1 + nesting;
488            cc_children(node, nesting + 1, score);
489            return;
490        }
491        "expression_switch_statement" | "type_switch_statement" | "select_statement" => {
492            *score += 1 + nesting;
493            cc_children(node, nesting + 1, score);
494            return;
495        }
496        "else_clause" => {
497            *score += 1; // no nesting increment for else
498        }
499        "binary_expression" => {
500            if let Some(op) = node.child_by_field_name("operator")
501                && (op.kind() == "&&" || op.kind() == "||")
502            {
503                *score += 1;
504            }
505        }
506        _ => {}
507    }
508    cc_children(node, nesting, score);
509}
510
511fn cc_children(node: Node, nesting: usize, score: &mut usize) {
512    let mut cursor = node.walk();
513    for child in node.children(&mut cursor) {
514        cc_walk(child, nesting, score);
515    }
516}
517
518fn collect_field_refs_go(body: Node, src: &[u8]) -> Vec<String> {
519    let mut refs = Vec::new();
520    let mut cursor = body.walk();
521    visit_all(body, &mut cursor, &mut |n| {
522        if n.kind() == "selector_expression"
523            && let Some(field) = n.child_by_field_name("field")
524        {
525            let name = node_text(field, src).to_string();
526            if !refs.contains(&name) {
527                refs.push(name);
528            }
529        }
530    });
531    refs
532}
533
534fn extract_switch_target_go(body: Node, src: &[u8]) -> Option<String> {
535    let mut target = None;
536    let mut cursor = body.walk();
537    visit_all(body, &mut cursor, &mut |n| {
538        if (n.kind() == "expression_switch_statement" || n.kind() == "type_switch_statement")
539            && target.is_none()
540            && let Some(val) = n.child_by_field_name("value")
541        {
542            target = Some(node_text(val, src).to_string());
543        }
544    });
545    target
546}
547
548fn collect_calls(body: Option<Node>, src: &[u8]) -> Vec<String> {
549    let Some(body) = body else { return Vec::new() };
550    let mut calls = Vec::new();
551    let mut cursor = body.walk();
552    visit_all(body, &mut cursor, &mut |n| {
553        if n.kind() == "call_expression"
554            && let Some(func) = n.child(0)
555        {
556            let name = node_text(func, src).to_string();
557            if !calls.contains(&name) {
558                calls.push(name);
559            }
560        }
561    });
562    calls
563}
564
565fn node_text<'a>(node: Node, src: &'a [u8]) -> &'a str {
566    node.utf8_text(src).unwrap_or("")
567}
568
569fn collect_comments(root: Node, src: &[u8]) -> Vec<cha_core::CommentInfo> {
570    let mut comments = Vec::new();
571    let mut cursor = root.walk();
572    visit_all(root, &mut cursor, &mut |n| {
573        if n.kind().contains("comment") {
574            comments.push(cha_core::CommentInfo {
575                text: node_text(n, src).to_string(),
576                line: n.start_position().row + 1,
577            });
578        }
579    });
580    comments
581}
582
583fn visit_all<F: FnMut(Node)>(node: Node, cursor: &mut tree_sitter::TreeCursor, f: &mut F) {
584    f(node);
585    if cursor.goto_first_child() {
586        loop {
587            let child_node = cursor.node();
588            let mut child_cursor = child_node.walk();
589            visit_all(child_node, &mut child_cursor, f);
590            if !cursor.goto_next_sibling() {
591                break;
592            }
593        }
594        cursor.goto_parent();
595    }
596}