Skip to main content

cha_parser/
golang.rs

1use std::collections::hash_map::DefaultHasher;
2use std::hash::{Hash, Hasher};
3
4use cha_core::{ClassInfo, FunctionInfo, ImportInfo, SourceFile, SourceModel};
5use tree_sitter::{Node, Parser};
6
7use crate::LanguageParser;
8
9pub struct GolangParser;
10
11impl LanguageParser for GolangParser {
12    fn language_name(&self) -> &str {
13        "go"
14    }
15
16    fn ts_language(&self) -> tree_sitter::Language {
17        tree_sitter_go::LANGUAGE.into()
18    }
19
20    fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
21        let mut parser = Parser::new();
22        parser.set_language(&tree_sitter_go::LANGUAGE.into()).ok()?;
23        let tree = parser.parse(&file.content, None)?;
24        let root = tree.root_node();
25        let src = file.content.as_bytes();
26
27        let mut functions = Vec::new();
28        let mut classes = Vec::new();
29        let mut imports = Vec::new();
30        let mut type_aliases = Vec::new();
31
32        let imports_map = crate::golang_imports::build(root, src, &file.path);
33        collect_top_level(
34            root,
35            src,
36            &imports_map,
37            &mut functions,
38            &mut classes,
39            &mut imports,
40            &mut type_aliases,
41        );
42
43        Some(SourceModel {
44            language: "go".into(),
45            total_lines: file.line_count(),
46            functions,
47            classes,
48            imports,
49            comments: collect_comments(root, src),
50            type_aliases,
51        })
52    }
53}
54
55fn collect_top_level(
56    root: Node,
57    src: &[u8],
58    imports_map: &crate::type_ref::ImportsMap,
59    functions: &mut Vec<FunctionInfo>,
60    classes: &mut Vec<ClassInfo>,
61    imports: &mut Vec<ImportInfo>,
62    type_aliases: &mut Vec<(String, String)>,
63) {
64    let mut cursor = root.walk();
65    for child in root.children(&mut cursor) {
66        match child.kind() {
67            "function_declaration" | "method_declaration" => {
68                if let Some(f) = extract_function(child, src, imports_map) {
69                    functions.push(f);
70                }
71            }
72            "type_declaration" => extract_type_decl(child, src, classes, type_aliases),
73            "import_declaration" => collect_imports(child, src, imports),
74            _ => {}
75        }
76    }
77}
78
79fn extract_function(
80    node: Node,
81    src: &[u8],
82    imports_map: &crate::type_ref::ImportsMap,
83) -> Option<FunctionInfo> {
84    let name_node = node.child_by_field_name("name")?;
85    let name = node_text(name_node, src).to_string();
86    let name_col = name_node.start_position().column;
87    let name_end_col = name_node.end_position().column;
88    let start_line = node.start_position().row + 1;
89    let end_line = node.end_position().row + 1;
90    let body = node.child_by_field_name("body");
91    let params = node.child_by_field_name("parameters");
92    let (param_count, param_types, param_names) = params
93        .map(|p| extract_params(p, src, imports_map))
94        .unwrap_or((0, vec![], vec![]));
95    let is_exported = name.starts_with(|c: char| c.is_uppercase());
96
97    Some(FunctionInfo {
98        name,
99        start_line,
100        end_line,
101        name_col,
102        name_end_col,
103        line_count: end_line - start_line + 1,
104        complexity: count_complexity(node),
105        body_hash: body.map(hash_ast),
106        is_exported,
107        parameter_count: param_count,
108        parameter_types: param_types,
109        parameter_names: param_names,
110        chain_depth: body.map(max_chain_depth).unwrap_or(0),
111        switch_arms: body.map(count_case_clauses).unwrap_or(0),
112        switch_arm_values: body
113            .map(|b| collect_go_arm_values(b, src))
114            .unwrap_or_default(),
115        external_refs: body
116            .map(|b| collect_external_refs(b, src))
117            .unwrap_or_default(),
118        is_delegating: body.map(|b| check_delegating(b, src)).unwrap_or(false),
119        comment_lines: count_comment_lines(node, src),
120        referenced_fields: body
121            .map(|b| collect_field_refs_go(b, src))
122            .unwrap_or_default(),
123        null_check_fields: body.map(|b| collect_nil_checks(b, src)).unwrap_or_default(),
124        switch_dispatch_target: body.and_then(|b| extract_switch_target_go(b, src)),
125        optional_param_count: 0,
126        called_functions: collect_calls(body, src),
127        cognitive_complexity: body.map(|b| cognitive_complexity_go(b)).unwrap_or(0),
128        return_type: node
129            .child_by_field_name("result")
130            .map(|rt| crate::type_ref::resolve(node_text(rt, src), imports_map)),
131    })
132}
133
134fn extract_type_decl(
135    node: Node,
136    src: &[u8],
137    classes: &mut Vec<ClassInfo>,
138    type_aliases: &mut Vec<(String, String)>,
139) {
140    // Go distinguishes `type X = Y` (alias, `type_alias` node) from
141    // `type X Y` (defined type, `type_spec` node). Only the former is a
142    // transparent alias worth recording for TypeRef origin resolution.
143    let mut cursor = node.walk();
144    for child in node.children(&mut cursor) {
145        match child.kind() {
146            "type_spec" => {
147                if let Some(c) = extract_struct(child, src) {
148                    classes.push(c);
149                }
150            }
151            "type_alias" => {
152                if let Some(pair) = crate::type_aliases::go(child, src) {
153                    type_aliases.push(pair);
154                }
155            }
156            _ => {}
157        }
158    }
159}
160
161fn extract_struct(node: Node, src: &[u8]) -> Option<ClassInfo> {
162    let name_node = node.child_by_field_name("name")?;
163    let name = node_text(name_node, src).to_string();
164    let name_col = name_node.start_position().column;
165    let name_end_col = name_node.end_position().column;
166    let type_node = node.child_by_field_name("type")?;
167    if type_node.kind() != "struct_type" && type_node.kind() != "interface_type" {
168        return None;
169    }
170    let is_interface = type_node.kind() == "interface_type";
171    let start_line = node.start_position().row + 1;
172    let end_line = node.end_position().row + 1;
173    let field_count = count_struct_fields(type_node);
174    let is_exported = name.starts_with(|c: char| c.is_uppercase());
175
176    Some(ClassInfo {
177        name,
178        start_line,
179        end_line,
180        name_col,
181        name_end_col,
182        line_count: end_line - start_line + 1,
183        method_count: 0,
184        is_exported,
185        delegating_method_count: 0,
186        field_count,
187        field_names: Vec::new(),
188        field_types: Vec::new(),
189        has_behavior: false,
190        is_interface,
191        parent_name: None,
192        override_count: 0,
193        self_call_count: 0,
194        has_listener_field: false,
195        has_notify_method: false,
196    })
197}
198
199fn count_struct_fields(node: Node) -> usize {
200    let mut count = 0;
201    let mut cursor = node.walk();
202    visit_all(node, &mut cursor, &mut |n| {
203        if n.kind() == "field_declaration" {
204            count += 1;
205        }
206    });
207    count
208}
209
210fn collect_imports(node: Node, src: &[u8], imports: &mut Vec<ImportInfo>) {
211    let mut cursor = node.walk();
212    visit_all(node, &mut cursor, &mut |n| {
213        if n.kind() == "import_spec" {
214            let line = n.start_position().row + 1;
215            let col = n.start_position().column;
216            let path_node = n.child_by_field_name("path").unwrap_or(n);
217            let text = node_text(path_node, src).trim_matches('"').to_string();
218            if !text.is_empty() {
219                imports.push(ImportInfo {
220                    source: text,
221                    line,
222                    col,
223                    ..Default::default()
224                });
225            }
226        }
227    });
228}
229
230fn extract_params(
231    params: Node,
232    src: &[u8],
233    imports_map: &crate::type_ref::ImportsMap,
234) -> (usize, Vec<cha_core::TypeRef>, Vec<String>) {
235    let mut count = 0;
236    let mut types = Vec::new();
237    let mut names_out = Vec::new();
238    let mut cursor = params.walk();
239    for child in params.children(&mut cursor) {
240        if child.kind() == "parameter_declaration" {
241            let raw = child
242                .child_by_field_name("type")
243                .map(|t| node_text(t, src).to_string())
244                .unwrap_or_else(|| "any".into());
245            // Each parameter_declaration can introduce multiple names
246            // sharing one type (`a, b int`). Expand them out to one
247            // TypeRef + name pair each.
248            let mut inner = child.walk();
249            let idents: Vec<String> = child
250                .children(&mut inner)
251                .filter(|c| c.kind() == "identifier")
252                .map(|c| node_text(c, src).to_string())
253                .collect();
254            if idents.is_empty() {
255                count += 1;
256                types.push(resolve_go_type(&raw, imports_map));
257                names_out.push(String::new());
258            } else {
259                for n in idents {
260                    count += 1;
261                    types.push(resolve_go_type(&raw, imports_map));
262                    names_out.push(n);
263                }
264            }
265        }
266    }
267    (count, types, names_out)
268}
269
270/// Go types are `pkg.TypeName` or `*pkg.TypeName`; the importable name in
271/// ImportsMap is the package alias. Split on `.`, look up the first segment.
272fn resolve_go_type(raw: &str, imports_map: &crate::type_ref::ImportsMap) -> cha_core::TypeRef {
273    // Strip decorations to get the inner `pkg.Type` or `Type`.
274    let inner = raw.trim_start_matches('*').trim_start_matches('[').trim();
275    let inner = inner.trim_start_matches(']').trim();
276    let mut parts = inner.splitn(2, '.');
277    let first = parts.next().unwrap_or(inner);
278    let second = parts.next();
279    let (short_name, origin) = if let Some(type_part) = second {
280        let origin = imports_map
281            .get(first)
282            .cloned()
283            .unwrap_or(cha_core::TypeOrigin::Unknown);
284        (type_part.to_string(), origin)
285    } else {
286        // No `.` → builtin type (string, int, bool, etc.) or locally-declared
287        // type. Treat builtin primitives accordingly; everything else → Local.
288        let origin = if is_go_builtin(inner) {
289            cha_core::TypeOrigin::Primitive
290        } else {
291            cha_core::TypeOrigin::Local
292        };
293        (inner.to_string(), origin)
294    };
295    cha_core::TypeRef {
296        name: short_name,
297        raw: raw.to_string(),
298        origin,
299    }
300}
301
302fn is_go_builtin(name: &str) -> bool {
303    matches!(
304        name,
305        "bool"
306            | "byte"
307            | "complex64"
308            | "complex128"
309            | "error"
310            | "float32"
311            | "float64"
312            | "int"
313            | "int8"
314            | "int16"
315            | "int32"
316            | "int64"
317            | "rune"
318            | "string"
319            | "uint"
320            | "uint8"
321            | "uint16"
322            | "uint32"
323            | "uint64"
324            | "uintptr"
325            | "any"
326            | "interface{}"
327    )
328}
329
330fn count_complexity(node: Node) -> usize {
331    let mut c = 1usize;
332    let mut cursor = node.walk();
333    visit_all(node, &mut cursor, &mut |n| match n.kind() {
334        "if_statement" | "for_statement" | "expression_case" | "default_case" | "type_case"
335        | "select_statement" | "go_statement" => c += 1,
336        "binary_expression" => {
337            if let Some(op) = n.child_by_field_name("operator") {
338                let kind = op.kind();
339                if kind == "&&" || kind == "||" {
340                    c += 1;
341                }
342            }
343        }
344        _ => {}
345    });
346    c
347}
348
349fn max_chain_depth(node: Node) -> usize {
350    let mut max = 0;
351    let mut cursor = node.walk();
352    visit_all(node, &mut cursor, &mut |n| {
353        if n.kind() == "selector_expression" {
354            let d = chain_len(n);
355            if d > max {
356                max = d;
357            }
358        }
359    });
360    max
361}
362
363fn chain_len(node: Node) -> usize {
364    let mut depth = 0;
365    let mut current = node;
366    while current.kind() == "selector_expression" || current.kind() == "call_expression" {
367        if current.kind() == "selector_expression" {
368            depth += 1;
369        }
370        match current.child(0) {
371            Some(c) => current = c,
372            None => break,
373        }
374    }
375    depth
376}
377
378fn collect_go_arm_values(body: Node, src: &[u8]) -> Vec<cha_core::ArmValue> {
379    let mut out = Vec::new();
380    crate::switch_arms::walk_arms(body, src, &mut out, &|n| n.kind() == "expression_case");
381    out
382}
383
384fn count_case_clauses(node: Node) -> usize {
385    let mut count = 0;
386    let mut cursor = node.walk();
387    visit_all(node, &mut cursor, &mut |n| {
388        if n.kind() == "expression_case" || n.kind() == "default_case" || n.kind() == "type_case" {
389            count += 1;
390        }
391    });
392    count
393}
394
395fn collect_external_refs(node: Node, src: &[u8]) -> Vec<String> {
396    let mut refs = Vec::new();
397    let mut cursor = node.walk();
398    visit_all(node, &mut cursor, &mut |n| {
399        if n.kind() == "selector_expression"
400            && let Some(obj) = n.child(0)
401            && obj.kind() == "identifier"
402        {
403            let text = node_text(obj, src).to_string();
404            if !refs.contains(&text) {
405                refs.push(text);
406            }
407        }
408    });
409    refs
410}
411
412fn check_delegating(body: Node, src: &[u8]) -> bool {
413    let mut cursor = body.walk();
414    let stmts: Vec<Node> = body
415        .children(&mut cursor)
416        .filter(|n| n.kind() != "{" && n.kind() != "}" && n.kind() != "comment")
417        .collect();
418    if stmts.len() != 1 {
419        return false;
420    }
421    let stmt = stmts[0];
422    let call = match stmt.kind() {
423        "return_statement" => stmt.child(1).filter(|c| c.kind() == "call_expression"),
424        "expression_statement" => stmt.child(0).filter(|c| c.kind() == "call_expression"),
425        _ => None,
426    };
427    call.and_then(|c| c.child(0))
428        .is_some_and(|f| node_text(f, src).contains('.'))
429}
430
431fn count_comment_lines(node: Node, src: &[u8]) -> usize {
432    let mut count = 0;
433    let mut cursor = node.walk();
434    visit_all(node, &mut cursor, &mut |n| {
435        if n.kind() == "comment" {
436            count += node_text(n, src).lines().count();
437        }
438    });
439    count
440}
441
442fn collect_nil_checks(body: Node, src: &[u8]) -> Vec<String> {
443    let mut fields = Vec::new();
444    let mut cursor = body.walk();
445    visit_all(body, &mut cursor, &mut |n| {
446        if n.kind() != "binary_expression" {
447            return;
448        }
449        let text = node_text(n, src);
450        if !text.contains("nil") {
451            return;
452        }
453        if let Some(left) = n.child(0) {
454            let name = node_text(left, src).to_string();
455            if !fields.contains(&name) {
456                fields.push(name);
457            }
458        }
459    });
460    fields
461}
462
463fn hash_ast(node: Node) -> u64 {
464    let mut hasher = DefaultHasher::new();
465    hash_node(node, &mut hasher);
466    hasher.finish()
467}
468
469fn hash_node(node: Node, hasher: &mut DefaultHasher) {
470    node.kind().hash(hasher);
471    let mut cursor = node.walk();
472    for child in node.children(&mut cursor) {
473        hash_node(child, hasher);
474    }
475}
476
477fn cognitive_complexity_go(node: Node) -> usize {
478    let mut score = 0;
479    cc_walk(node, 0, &mut score);
480    score
481}
482
483fn cc_walk(node: Node, nesting: usize, score: &mut usize) {
484    match node.kind() {
485        "if_statement" => {
486            *score += 1 + nesting;
487            cc_children(node, nesting + 1, score);
488            return;
489        }
490        "for_statement" => {
491            *score += 1 + nesting;
492            cc_children(node, nesting + 1, score);
493            return;
494        }
495        "expression_switch_statement" | "type_switch_statement" | "select_statement" => {
496            *score += 1 + nesting;
497            cc_children(node, nesting + 1, score);
498            return;
499        }
500        "else_clause" => {
501            *score += 1; // no nesting increment for else
502        }
503        "binary_expression" => {
504            if let Some(op) = node.child_by_field_name("operator")
505                && (op.kind() == "&&" || op.kind() == "||")
506            {
507                *score += 1;
508            }
509        }
510        _ => {}
511    }
512    cc_children(node, nesting, score);
513}
514
515fn cc_children(node: Node, nesting: usize, score: &mut usize) {
516    let mut cursor = node.walk();
517    for child in node.children(&mut cursor) {
518        cc_walk(child, nesting, score);
519    }
520}
521
522fn collect_field_refs_go(body: Node, src: &[u8]) -> Vec<String> {
523    let mut refs = Vec::new();
524    let mut cursor = body.walk();
525    visit_all(body, &mut cursor, &mut |n| {
526        if n.kind() == "selector_expression"
527            && let Some(field) = n.child_by_field_name("field")
528        {
529            let name = node_text(field, src).to_string();
530            if !refs.contains(&name) {
531                refs.push(name);
532            }
533        }
534    });
535    refs
536}
537
538fn extract_switch_target_go(body: Node, src: &[u8]) -> Option<String> {
539    let mut target = None;
540    let mut cursor = body.walk();
541    visit_all(body, &mut cursor, &mut |n| {
542        if (n.kind() == "expression_switch_statement" || n.kind() == "type_switch_statement")
543            && target.is_none()
544            && let Some(val) = n.child_by_field_name("value")
545        {
546            target = Some(node_text(val, src).to_string());
547        }
548    });
549    target
550}
551
552fn collect_calls(body: Option<Node>, src: &[u8]) -> Vec<String> {
553    let Some(body) = body else { return Vec::new() };
554    let mut calls = Vec::new();
555    let mut cursor = body.walk();
556    visit_all(body, &mut cursor, &mut |n| {
557        if n.kind() == "call_expression"
558            && let Some(func) = n.child(0)
559        {
560            let name = node_text(func, src).to_string();
561            if !calls.contains(&name) {
562                calls.push(name);
563            }
564        }
565    });
566    calls
567}
568
569fn node_text<'a>(node: Node, src: &'a [u8]) -> &'a str {
570    node.utf8_text(src).unwrap_or("")
571}
572
573fn collect_comments(root: Node, src: &[u8]) -> Vec<cha_core::CommentInfo> {
574    let mut comments = Vec::new();
575    let mut cursor = root.walk();
576    visit_all(root, &mut cursor, &mut |n| {
577        if n.kind().contains("comment") {
578            comments.push(cha_core::CommentInfo {
579                text: node_text(n, src).to_string(),
580                line: n.start_position().row + 1,
581            });
582        }
583    });
584    comments
585}
586
587fn visit_all<F: FnMut(Node)>(node: Node, cursor: &mut tree_sitter::TreeCursor, f: &mut F) {
588    f(node);
589    if cursor.goto_first_child() {
590        loop {
591            let child_node = cursor.node();
592            let mut child_cursor = child_node.walk();
593            visit_all(child_node, &mut child_cursor, f);
594            if !cursor.goto_next_sibling() {
595                break;
596            }
597        }
598        cursor.goto_parent();
599    }
600}