Skip to main content

cha_parser/
golang.rs

1use std::collections::hash_map::DefaultHasher;
2use std::hash::{Hash, Hasher};
3
4use cha_core::{ClassInfo, FunctionInfo, ImportInfo, SourceFile, SourceModel};
5use tree_sitter::{Node, Parser};
6
7use crate::LanguageParser;
8
9pub struct GolangParser;
10
11impl LanguageParser for GolangParser {
12    fn language_name(&self) -> &str {
13        "go"
14    }
15
16    fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
17        let mut parser = Parser::new();
18        parser.set_language(&tree_sitter_go::LANGUAGE.into()).ok()?;
19        let tree = parser.parse(&file.content, None)?;
20        let root = tree.root_node();
21        let src = file.content.as_bytes();
22
23        let mut functions = Vec::new();
24        let mut classes = Vec::new();
25        let mut imports = Vec::new();
26        let mut type_aliases = Vec::new();
27
28        let imports_map = crate::golang_imports::build(root, src, &file.path);
29        collect_top_level(
30            root,
31            src,
32            &imports_map,
33            &mut functions,
34            &mut classes,
35            &mut imports,
36            &mut type_aliases,
37        );
38
39        Some(SourceModel {
40            language: "go".into(),
41            total_lines: file.line_count(),
42            functions,
43            classes,
44            imports,
45            comments: collect_comments(root, src),
46            type_aliases,
47        })
48    }
49}
50
51fn collect_top_level(
52    root: Node,
53    src: &[u8],
54    imports_map: &crate::type_ref::ImportsMap,
55    functions: &mut Vec<FunctionInfo>,
56    classes: &mut Vec<ClassInfo>,
57    imports: &mut Vec<ImportInfo>,
58    type_aliases: &mut Vec<(String, String)>,
59) {
60    let mut cursor = root.walk();
61    for child in root.children(&mut cursor) {
62        match child.kind() {
63            "function_declaration" | "method_declaration" => {
64                if let Some(f) = extract_function(child, src, imports_map) {
65                    functions.push(f);
66                }
67            }
68            "type_declaration" => extract_type_decl(child, src, classes, type_aliases),
69            "import_declaration" => collect_imports(child, src, imports),
70            _ => {}
71        }
72    }
73}
74
75fn extract_function(
76    node: Node,
77    src: &[u8],
78    imports_map: &crate::type_ref::ImportsMap,
79) -> Option<FunctionInfo> {
80    let name_node = node.child_by_field_name("name")?;
81    let name = node_text(name_node, src).to_string();
82    let name_col = name_node.start_position().column;
83    let name_end_col = name_node.end_position().column;
84    let start_line = node.start_position().row + 1;
85    let end_line = node.end_position().row + 1;
86    let body = node.child_by_field_name("body");
87    let params = node.child_by_field_name("parameters");
88    let (param_count, param_types) = params
89        .map(|p| extract_params(p, src, imports_map))
90        .unwrap_or((0, vec![]));
91    let is_exported = name.starts_with(|c: char| c.is_uppercase());
92
93    Some(FunctionInfo {
94        name,
95        start_line,
96        end_line,
97        name_col,
98        name_end_col,
99        line_count: end_line - start_line + 1,
100        complexity: count_complexity(node),
101        body_hash: body.map(hash_ast),
102        is_exported,
103        parameter_count: param_count,
104        parameter_types: param_types,
105        chain_depth: body.map(max_chain_depth).unwrap_or(0),
106        switch_arms: body.map(count_case_clauses).unwrap_or(0),
107        external_refs: body
108            .map(|b| collect_external_refs(b, src))
109            .unwrap_or_default(),
110        is_delegating: body.map(|b| check_delegating(b, src)).unwrap_or(false),
111        comment_lines: count_comment_lines(node, src),
112        referenced_fields: body
113            .map(|b| collect_field_refs_go(b, src))
114            .unwrap_or_default(),
115        null_check_fields: body.map(|b| collect_nil_checks(b, src)).unwrap_or_default(),
116        switch_dispatch_target: body.and_then(|b| extract_switch_target_go(b, src)),
117        optional_param_count: 0,
118        called_functions: collect_calls(body, src),
119        cognitive_complexity: body.map(|b| cognitive_complexity_go(b)).unwrap_or(0),
120        return_type: node
121            .child_by_field_name("result")
122            .map(|rt| crate::type_ref::resolve(node_text(rt, src), imports_map)),
123    })
124}
125
126fn extract_type_decl(
127    node: Node,
128    src: &[u8],
129    classes: &mut Vec<ClassInfo>,
130    type_aliases: &mut Vec<(String, String)>,
131) {
132    // Go distinguishes `type X = Y` (alias, `type_alias` node) from
133    // `type X Y` (defined type, `type_spec` node). Only the former is a
134    // transparent alias worth recording for TypeRef origin resolution.
135    let mut cursor = node.walk();
136    for child in node.children(&mut cursor) {
137        match child.kind() {
138            "type_spec" => {
139                if let Some(c) = extract_struct(child, src) {
140                    classes.push(c);
141                }
142            }
143            "type_alias" => {
144                if let Some(pair) = crate::type_aliases::go(child, src) {
145                    type_aliases.push(pair);
146                }
147            }
148            _ => {}
149        }
150    }
151}
152
153fn extract_struct(node: Node, src: &[u8]) -> Option<ClassInfo> {
154    let name_node = node.child_by_field_name("name")?;
155    let name = node_text(name_node, src).to_string();
156    let name_col = name_node.start_position().column;
157    let name_end_col = name_node.end_position().column;
158    let type_node = node.child_by_field_name("type")?;
159    if type_node.kind() != "struct_type" && type_node.kind() != "interface_type" {
160        return None;
161    }
162    let is_interface = type_node.kind() == "interface_type";
163    let start_line = node.start_position().row + 1;
164    let end_line = node.end_position().row + 1;
165    let field_count = count_struct_fields(type_node);
166    let is_exported = name.starts_with(|c: char| c.is_uppercase());
167
168    Some(ClassInfo {
169        name,
170        start_line,
171        end_line,
172        name_col,
173        name_end_col,
174        line_count: end_line - start_line + 1,
175        method_count: 0,
176        is_exported,
177        delegating_method_count: 0,
178        field_count,
179        field_names: Vec::new(),
180        field_types: Vec::new(),
181        has_behavior: false,
182        is_interface,
183        parent_name: None,
184        override_count: 0,
185        self_call_count: 0,
186        has_listener_field: false,
187        has_notify_method: false,
188    })
189}
190
191fn count_struct_fields(node: Node) -> usize {
192    let mut count = 0;
193    let mut cursor = node.walk();
194    visit_all(node, &mut cursor, &mut |n| {
195        if n.kind() == "field_declaration" {
196            count += 1;
197        }
198    });
199    count
200}
201
202fn collect_imports(node: Node, src: &[u8], imports: &mut Vec<ImportInfo>) {
203    let mut cursor = node.walk();
204    visit_all(node, &mut cursor, &mut |n| {
205        if n.kind() == "import_spec" {
206            let line = n.start_position().row + 1;
207            let col = n.start_position().column;
208            let path_node = n.child_by_field_name("path").unwrap_or(n);
209            let text = node_text(path_node, src).trim_matches('"').to_string();
210            if !text.is_empty() {
211                imports.push(ImportInfo {
212                    source: text,
213                    line,
214                    col,
215                    ..Default::default()
216                });
217            }
218        }
219    });
220}
221
222fn extract_params(
223    params: Node,
224    src: &[u8],
225    imports_map: &crate::type_ref::ImportsMap,
226) -> (usize, Vec<cha_core::TypeRef>) {
227    let mut count = 0;
228    let mut types = Vec::new();
229    let mut cursor = params.walk();
230    for child in params.children(&mut cursor) {
231        if child.kind() == "parameter_declaration" {
232            let raw = child
233                .child_by_field_name("type")
234                .map(|t| node_text(t, src).to_string())
235                .unwrap_or_else(|| "any".into());
236            // Count names in this declaration (e.g. `a, b int` = 2 params)
237            let mut inner = child.walk();
238            let names: usize = child
239                .children(&mut inner)
240                .filter(|c| c.kind() == "identifier")
241                .count()
242                .max(1);
243            for _ in 0..names {
244                count += 1;
245                types.push(resolve_go_type(&raw, imports_map));
246            }
247        }
248    }
249    (count, types)
250}
251
252/// Go types are `pkg.TypeName` or `*pkg.TypeName`; the importable name in
253/// ImportsMap is the package alias. Split on `.`, look up the first segment.
254fn resolve_go_type(raw: &str, imports_map: &crate::type_ref::ImportsMap) -> cha_core::TypeRef {
255    // Strip decorations to get the inner `pkg.Type` or `Type`.
256    let inner = raw.trim_start_matches('*').trim_start_matches('[').trim();
257    let inner = inner.trim_start_matches(']').trim();
258    let mut parts = inner.splitn(2, '.');
259    let first = parts.next().unwrap_or(inner);
260    let second = parts.next();
261    let (short_name, origin) = if let Some(type_part) = second {
262        let origin = imports_map
263            .get(first)
264            .cloned()
265            .unwrap_or(cha_core::TypeOrigin::Unknown);
266        (type_part.to_string(), origin)
267    } else {
268        // No `.` → builtin type (string, int, bool, etc.) or locally-declared
269        // type. Treat builtin primitives accordingly; everything else → Local.
270        let origin = if is_go_builtin(inner) {
271            cha_core::TypeOrigin::Primitive
272        } else {
273            cha_core::TypeOrigin::Local
274        };
275        (inner.to_string(), origin)
276    };
277    cha_core::TypeRef {
278        name: short_name,
279        raw: raw.to_string(),
280        origin,
281    }
282}
283
284fn is_go_builtin(name: &str) -> bool {
285    matches!(
286        name,
287        "bool"
288            | "byte"
289            | "complex64"
290            | "complex128"
291            | "error"
292            | "float32"
293            | "float64"
294            | "int"
295            | "int8"
296            | "int16"
297            | "int32"
298            | "int64"
299            | "rune"
300            | "string"
301            | "uint"
302            | "uint8"
303            | "uint16"
304            | "uint32"
305            | "uint64"
306            | "uintptr"
307            | "any"
308            | "interface{}"
309    )
310}
311
312fn count_complexity(node: Node) -> usize {
313    let mut c = 1usize;
314    let mut cursor = node.walk();
315    visit_all(node, &mut cursor, &mut |n| match n.kind() {
316        "if_statement" | "for_statement" | "expression_case" | "default_case" | "type_case"
317        | "select_statement" | "go_statement" => c += 1,
318        "binary_expression" => {
319            if let Some(op) = n.child_by_field_name("operator") {
320                let kind = op.kind();
321                if kind == "&&" || kind == "||" {
322                    c += 1;
323                }
324            }
325        }
326        _ => {}
327    });
328    c
329}
330
331fn max_chain_depth(node: Node) -> usize {
332    let mut max = 0;
333    let mut cursor = node.walk();
334    visit_all(node, &mut cursor, &mut |n| {
335        if n.kind() == "selector_expression" {
336            let d = chain_len(n);
337            if d > max {
338                max = d;
339            }
340        }
341    });
342    max
343}
344
345fn chain_len(node: Node) -> usize {
346    let mut depth = 0;
347    let mut current = node;
348    while current.kind() == "selector_expression" || current.kind() == "call_expression" {
349        if current.kind() == "selector_expression" {
350            depth += 1;
351        }
352        match current.child(0) {
353            Some(c) => current = c,
354            None => break,
355        }
356    }
357    depth
358}
359
360fn count_case_clauses(node: Node) -> usize {
361    let mut count = 0;
362    let mut cursor = node.walk();
363    visit_all(node, &mut cursor, &mut |n| {
364        if n.kind() == "expression_case" || n.kind() == "default_case" || n.kind() == "type_case" {
365            count += 1;
366        }
367    });
368    count
369}
370
371fn collect_external_refs(node: Node, src: &[u8]) -> Vec<String> {
372    let mut refs = Vec::new();
373    let mut cursor = node.walk();
374    visit_all(node, &mut cursor, &mut |n| {
375        if n.kind() == "selector_expression"
376            && let Some(obj) = n.child(0)
377            && obj.kind() == "identifier"
378        {
379            let text = node_text(obj, src).to_string();
380            if !refs.contains(&text) {
381                refs.push(text);
382            }
383        }
384    });
385    refs
386}
387
388fn check_delegating(body: Node, src: &[u8]) -> bool {
389    let mut cursor = body.walk();
390    let stmts: Vec<Node> = body
391        .children(&mut cursor)
392        .filter(|n| n.kind() != "{" && n.kind() != "}" && n.kind() != "comment")
393        .collect();
394    if stmts.len() != 1 {
395        return false;
396    }
397    let stmt = stmts[0];
398    let call = match stmt.kind() {
399        "return_statement" => stmt.child(1).filter(|c| c.kind() == "call_expression"),
400        "expression_statement" => stmt.child(0).filter(|c| c.kind() == "call_expression"),
401        _ => None,
402    };
403    call.and_then(|c| c.child(0))
404        .is_some_and(|f| node_text(f, src).contains('.'))
405}
406
407fn count_comment_lines(node: Node, src: &[u8]) -> usize {
408    let mut count = 0;
409    let mut cursor = node.walk();
410    visit_all(node, &mut cursor, &mut |n| {
411        if n.kind() == "comment" {
412            count += node_text(n, src).lines().count();
413        }
414    });
415    count
416}
417
418fn collect_nil_checks(body: Node, src: &[u8]) -> Vec<String> {
419    let mut fields = Vec::new();
420    let mut cursor = body.walk();
421    visit_all(body, &mut cursor, &mut |n| {
422        if n.kind() != "binary_expression" {
423            return;
424        }
425        let text = node_text(n, src);
426        if !text.contains("nil") {
427            return;
428        }
429        if let Some(left) = n.child(0) {
430            let name = node_text(left, src).to_string();
431            if !fields.contains(&name) {
432                fields.push(name);
433            }
434        }
435    });
436    fields
437}
438
439fn hash_ast(node: Node) -> u64 {
440    let mut hasher = DefaultHasher::new();
441    hash_node(node, &mut hasher);
442    hasher.finish()
443}
444
445fn hash_node(node: Node, hasher: &mut DefaultHasher) {
446    node.kind().hash(hasher);
447    let mut cursor = node.walk();
448    for child in node.children(&mut cursor) {
449        hash_node(child, hasher);
450    }
451}
452
453fn cognitive_complexity_go(node: Node) -> usize {
454    let mut score = 0;
455    cc_walk(node, 0, &mut score);
456    score
457}
458
459fn cc_walk(node: Node, nesting: usize, score: &mut usize) {
460    match node.kind() {
461        "if_statement" => {
462            *score += 1 + nesting;
463            cc_children(node, nesting + 1, score);
464            return;
465        }
466        "for_statement" => {
467            *score += 1 + nesting;
468            cc_children(node, nesting + 1, score);
469            return;
470        }
471        "expression_switch_statement" | "type_switch_statement" | "select_statement" => {
472            *score += 1 + nesting;
473            cc_children(node, nesting + 1, score);
474            return;
475        }
476        "else_clause" => {
477            *score += 1; // no nesting increment for else
478        }
479        "binary_expression" => {
480            if let Some(op) = node.child_by_field_name("operator")
481                && (op.kind() == "&&" || op.kind() == "||")
482            {
483                *score += 1;
484            }
485        }
486        _ => {}
487    }
488    cc_children(node, nesting, score);
489}
490
491fn cc_children(node: Node, nesting: usize, score: &mut usize) {
492    let mut cursor = node.walk();
493    for child in node.children(&mut cursor) {
494        cc_walk(child, nesting, score);
495    }
496}
497
498fn collect_field_refs_go(body: Node, src: &[u8]) -> Vec<String> {
499    let mut refs = Vec::new();
500    let mut cursor = body.walk();
501    visit_all(body, &mut cursor, &mut |n| {
502        if n.kind() == "selector_expression"
503            && let Some(field) = n.child_by_field_name("field")
504        {
505            let name = node_text(field, src).to_string();
506            if !refs.contains(&name) {
507                refs.push(name);
508            }
509        }
510    });
511    refs
512}
513
514fn extract_switch_target_go(body: Node, src: &[u8]) -> Option<String> {
515    let mut target = None;
516    let mut cursor = body.walk();
517    visit_all(body, &mut cursor, &mut |n| {
518        if (n.kind() == "expression_switch_statement" || n.kind() == "type_switch_statement")
519            && target.is_none()
520            && let Some(val) = n.child_by_field_name("value")
521        {
522            target = Some(node_text(val, src).to_string());
523        }
524    });
525    target
526}
527
528fn collect_calls(body: Option<Node>, src: &[u8]) -> Vec<String> {
529    let Some(body) = body else { return Vec::new() };
530    let mut calls = Vec::new();
531    let mut cursor = body.walk();
532    visit_all(body, &mut cursor, &mut |n| {
533        if n.kind() == "call_expression"
534            && let Some(func) = n.child(0)
535        {
536            let name = node_text(func, src).to_string();
537            if !calls.contains(&name) {
538                calls.push(name);
539            }
540        }
541    });
542    calls
543}
544
545fn node_text<'a>(node: Node, src: &'a [u8]) -> &'a str {
546    node.utf8_text(src).unwrap_or("")
547}
548
549fn collect_comments(root: Node, src: &[u8]) -> Vec<cha_core::CommentInfo> {
550    let mut comments = Vec::new();
551    let mut cursor = root.walk();
552    visit_all(root, &mut cursor, &mut |n| {
553        if n.kind().contains("comment") {
554            comments.push(cha_core::CommentInfo {
555                text: node_text(n, src).to_string(),
556                line: n.start_position().row + 1,
557            });
558        }
559    });
560    comments
561}
562
563fn visit_all<F: FnMut(Node)>(node: Node, cursor: &mut tree_sitter::TreeCursor, f: &mut F) {
564    f(node);
565    if cursor.goto_first_child() {
566        loop {
567            let child_node = cursor.node();
568            let mut child_cursor = child_node.walk();
569            visit_all(child_node, &mut child_cursor, f);
570            if !cursor.goto_next_sibling() {
571                break;
572            }
573        }
574        cursor.goto_parent();
575    }
576}