Skip to main content

cha_parser/
golang.rs

1use std::collections::hash_map::DefaultHasher;
2use std::hash::{Hash, Hasher};
3
4use cha_core::{ClassInfo, FunctionInfo, ImportInfo, SourceFile, SourceModel};
5use tree_sitter::{Node, Parser};
6
7use crate::LanguageParser;
8
9pub struct GolangParser;
10
11impl LanguageParser for GolangParser {
12    fn language_name(&self) -> &str {
13        "go"
14    }
15
16    fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
17        let mut parser = Parser::new();
18        parser.set_language(&tree_sitter_go::LANGUAGE.into()).ok()?;
19        let tree = parser.parse(&file.content, None)?;
20        let root = tree.root_node();
21        let src = file.content.as_bytes();
22
23        let mut functions = Vec::new();
24        let mut classes = Vec::new();
25        let mut imports = Vec::new();
26
27        let imports_map = crate::golang_imports::build(root, src, &file.path);
28        collect_top_level(
29            root,
30            src,
31            &imports_map,
32            &mut functions,
33            &mut classes,
34            &mut imports,
35        );
36
37        Some(SourceModel {
38            language: "go".into(),
39            total_lines: file.line_count(),
40            functions,
41            classes,
42            imports,
43            comments: collect_comments(root, src),
44            type_aliases: vec![], // TODO(parser): extract type aliases from 'type X = Y' declarations
45        })
46    }
47}
48
49fn collect_top_level(
50    root: Node,
51    src: &[u8],
52    imports_map: &crate::type_ref::ImportsMap,
53    functions: &mut Vec<FunctionInfo>,
54    classes: &mut Vec<ClassInfo>,
55    imports: &mut Vec<ImportInfo>,
56) {
57    let mut cursor = root.walk();
58    for child in root.children(&mut cursor) {
59        match child.kind() {
60            "function_declaration" | "method_declaration" => {
61                if let Some(f) = extract_function(child, src, imports_map) {
62                    functions.push(f);
63                }
64            }
65            "type_declaration" => extract_type_decl(child, src, classes),
66            "import_declaration" => collect_imports(child, src, imports),
67            _ => {}
68        }
69    }
70}
71
72fn extract_function(
73    node: Node,
74    src: &[u8],
75    imports_map: &crate::type_ref::ImportsMap,
76) -> Option<FunctionInfo> {
77    let name_node = node.child_by_field_name("name")?;
78    let name = node_text(name_node, src).to_string();
79    let name_col = name_node.start_position().column;
80    let name_end_col = name_node.end_position().column;
81    let start_line = node.start_position().row + 1;
82    let end_line = node.end_position().row + 1;
83    let body = node.child_by_field_name("body");
84    let params = node.child_by_field_name("parameters");
85    let (param_count, param_types) = params
86        .map(|p| extract_params(p, src, imports_map))
87        .unwrap_or((0, vec![]));
88    let is_exported = name.starts_with(|c: char| c.is_uppercase());
89
90    Some(FunctionInfo {
91        name,
92        start_line,
93        end_line,
94        name_col,
95        name_end_col,
96        line_count: end_line - start_line + 1,
97        complexity: count_complexity(node),
98        body_hash: body.map(hash_ast),
99        is_exported,
100        parameter_count: param_count,
101        parameter_types: param_types,
102        chain_depth: body.map(max_chain_depth).unwrap_or(0),
103        switch_arms: body.map(count_case_clauses).unwrap_or(0),
104        external_refs: body
105            .map(|b| collect_external_refs(b, src))
106            .unwrap_or_default(),
107        is_delegating: body.map(|b| check_delegating(b, src)).unwrap_or(false),
108        comment_lines: count_comment_lines(node, src),
109        referenced_fields: body
110            .map(|b| collect_field_refs_go(b, src))
111            .unwrap_or_default(),
112        null_check_fields: body.map(|b| collect_nil_checks(b, src)).unwrap_or_default(),
113        switch_dispatch_target: body.and_then(|b| extract_switch_target_go(b, src)),
114        optional_param_count: 0,
115        called_functions: collect_calls(body, src),
116        cognitive_complexity: body.map(|b| cognitive_complexity_go(b)).unwrap_or(0),
117    })
118}
119
120fn extract_type_decl(node: Node, src: &[u8], classes: &mut Vec<ClassInfo>) {
121    let mut cursor = node.walk();
122    for child in node.children(&mut cursor) {
123        if child.kind() == "type_spec"
124            && let Some(c) = extract_struct(child, src)
125        {
126            classes.push(c);
127        }
128    }
129}
130
131fn extract_struct(node: Node, src: &[u8]) -> Option<ClassInfo> {
132    let name_node = node.child_by_field_name("name")?;
133    let name = node_text(name_node, src).to_string();
134    let name_col = name_node.start_position().column;
135    let name_end_col = name_node.end_position().column;
136    let type_node = node.child_by_field_name("type")?;
137    if type_node.kind() != "struct_type" && type_node.kind() != "interface_type" {
138        return None;
139    }
140    let is_interface = type_node.kind() == "interface_type";
141    let start_line = node.start_position().row + 1;
142    let end_line = node.end_position().row + 1;
143    let field_count = count_struct_fields(type_node);
144    let is_exported = name.starts_with(|c: char| c.is_uppercase());
145
146    Some(ClassInfo {
147        name,
148        start_line,
149        end_line,
150        name_col,
151        name_end_col,
152        line_count: end_line - start_line + 1,
153        method_count: 0,
154        is_exported,
155        delegating_method_count: 0,
156        field_count,
157        field_names: Vec::new(),
158        field_types: Vec::new(),
159        has_behavior: false,
160        is_interface,
161        parent_name: None,
162        override_count: 0,
163        self_call_count: 0,
164        has_listener_field: false,
165        has_notify_method: false,
166    })
167}
168
169fn count_struct_fields(node: Node) -> usize {
170    let mut count = 0;
171    let mut cursor = node.walk();
172    visit_all(node, &mut cursor, &mut |n| {
173        if n.kind() == "field_declaration" {
174            count += 1;
175        }
176    });
177    count
178}
179
180fn collect_imports(node: Node, src: &[u8], imports: &mut Vec<ImportInfo>) {
181    let mut cursor = node.walk();
182    visit_all(node, &mut cursor, &mut |n| {
183        if n.kind() == "import_spec" {
184            let line = n.start_position().row + 1;
185            let col = n.start_position().column;
186            let path_node = n.child_by_field_name("path").unwrap_or(n);
187            let text = node_text(path_node, src).trim_matches('"').to_string();
188            if !text.is_empty() {
189                imports.push(ImportInfo {
190                    source: text,
191                    line,
192                    col,
193                    ..Default::default()
194                });
195            }
196        }
197    });
198}
199
200fn extract_params(
201    params: Node,
202    src: &[u8],
203    imports_map: &crate::type_ref::ImportsMap,
204) -> (usize, Vec<cha_core::TypeRef>) {
205    let mut count = 0;
206    let mut types = Vec::new();
207    let mut cursor = params.walk();
208    for child in params.children(&mut cursor) {
209        if child.kind() == "parameter_declaration" {
210            let raw = child
211                .child_by_field_name("type")
212                .map(|t| node_text(t, src).to_string())
213                .unwrap_or_else(|| "any".into());
214            // Count names in this declaration (e.g. `a, b int` = 2 params)
215            let mut inner = child.walk();
216            let names: usize = child
217                .children(&mut inner)
218                .filter(|c| c.kind() == "identifier")
219                .count()
220                .max(1);
221            for _ in 0..names {
222                count += 1;
223                types.push(resolve_go_type(&raw, imports_map));
224            }
225        }
226    }
227    (count, types)
228}
229
230/// Go types are `pkg.TypeName` or `*pkg.TypeName`; the importable name in
231/// ImportsMap is the package alias. Split on `.`, look up the first segment.
232fn resolve_go_type(raw: &str, imports_map: &crate::type_ref::ImportsMap) -> cha_core::TypeRef {
233    // Strip decorations to get the inner `pkg.Type` or `Type`.
234    let inner = raw.trim_start_matches('*').trim_start_matches('[').trim();
235    let inner = inner.trim_start_matches(']').trim();
236    let mut parts = inner.splitn(2, '.');
237    let first = parts.next().unwrap_or(inner);
238    let second = parts.next();
239    let (short_name, origin) = if let Some(type_part) = second {
240        let origin = imports_map
241            .get(first)
242            .cloned()
243            .unwrap_or(cha_core::TypeOrigin::Unknown);
244        (type_part.to_string(), origin)
245    } else {
246        // No `.` → builtin type (string, int, bool, etc.) or locally-declared
247        // type. Treat builtin primitives accordingly; everything else → Local.
248        let origin = if is_go_builtin(inner) {
249            cha_core::TypeOrigin::Primitive
250        } else {
251            cha_core::TypeOrigin::Local
252        };
253        (inner.to_string(), origin)
254    };
255    cha_core::TypeRef {
256        name: short_name,
257        raw: raw.to_string(),
258        origin,
259    }
260}
261
262fn is_go_builtin(name: &str) -> bool {
263    matches!(
264        name,
265        "bool"
266            | "byte"
267            | "complex64"
268            | "complex128"
269            | "error"
270            | "float32"
271            | "float64"
272            | "int"
273            | "int8"
274            | "int16"
275            | "int32"
276            | "int64"
277            | "rune"
278            | "string"
279            | "uint"
280            | "uint8"
281            | "uint16"
282            | "uint32"
283            | "uint64"
284            | "uintptr"
285            | "any"
286            | "interface{}"
287    )
288}
289
290fn count_complexity(node: Node) -> usize {
291    let mut c = 1usize;
292    let mut cursor = node.walk();
293    visit_all(node, &mut cursor, &mut |n| match n.kind() {
294        "if_statement" | "for_statement" | "expression_case" | "default_case" | "type_case"
295        | "select_statement" | "go_statement" => c += 1,
296        "binary_expression" => {
297            if let Some(op) = n.child_by_field_name("operator") {
298                let kind = op.kind();
299                if kind == "&&" || kind == "||" {
300                    c += 1;
301                }
302            }
303        }
304        _ => {}
305    });
306    c
307}
308
309fn max_chain_depth(node: Node) -> usize {
310    let mut max = 0;
311    let mut cursor = node.walk();
312    visit_all(node, &mut cursor, &mut |n| {
313        if n.kind() == "selector_expression" {
314            let d = chain_len(n);
315            if d > max {
316                max = d;
317            }
318        }
319    });
320    max
321}
322
323fn chain_len(node: Node) -> usize {
324    let mut depth = 0;
325    let mut current = node;
326    while current.kind() == "selector_expression" || current.kind() == "call_expression" {
327        if current.kind() == "selector_expression" {
328            depth += 1;
329        }
330        match current.child(0) {
331            Some(c) => current = c,
332            None => break,
333        }
334    }
335    depth
336}
337
338fn count_case_clauses(node: Node) -> usize {
339    let mut count = 0;
340    let mut cursor = node.walk();
341    visit_all(node, &mut cursor, &mut |n| {
342        if n.kind() == "expression_case" || n.kind() == "default_case" || n.kind() == "type_case" {
343            count += 1;
344        }
345    });
346    count
347}
348
349fn collect_external_refs(node: Node, src: &[u8]) -> Vec<String> {
350    let mut refs = Vec::new();
351    let mut cursor = node.walk();
352    visit_all(node, &mut cursor, &mut |n| {
353        if n.kind() == "selector_expression"
354            && let Some(obj) = n.child(0)
355            && obj.kind() == "identifier"
356        {
357            let text = node_text(obj, src).to_string();
358            if !refs.contains(&text) {
359                refs.push(text);
360            }
361        }
362    });
363    refs
364}
365
366fn check_delegating(body: Node, src: &[u8]) -> bool {
367    let mut cursor = body.walk();
368    let stmts: Vec<Node> = body
369        .children(&mut cursor)
370        .filter(|n| n.kind() != "{" && n.kind() != "}" && n.kind() != "comment")
371        .collect();
372    if stmts.len() != 1 {
373        return false;
374    }
375    let stmt = stmts[0];
376    let call = match stmt.kind() {
377        "return_statement" => stmt.child(1).filter(|c| c.kind() == "call_expression"),
378        "expression_statement" => stmt.child(0).filter(|c| c.kind() == "call_expression"),
379        _ => None,
380    };
381    call.and_then(|c| c.child(0))
382        .is_some_and(|f| node_text(f, src).contains('.'))
383}
384
385fn count_comment_lines(node: Node, src: &[u8]) -> usize {
386    let mut count = 0;
387    let mut cursor = node.walk();
388    visit_all(node, &mut cursor, &mut |n| {
389        if n.kind() == "comment" {
390            count += node_text(n, src).lines().count();
391        }
392    });
393    count
394}
395
396fn collect_nil_checks(body: Node, src: &[u8]) -> Vec<String> {
397    let mut fields = Vec::new();
398    let mut cursor = body.walk();
399    visit_all(body, &mut cursor, &mut |n| {
400        if n.kind() != "binary_expression" {
401            return;
402        }
403        let text = node_text(n, src);
404        if !text.contains("nil") {
405            return;
406        }
407        if let Some(left) = n.child(0) {
408            let name = node_text(left, src).to_string();
409            if !fields.contains(&name) {
410                fields.push(name);
411            }
412        }
413    });
414    fields
415}
416
417fn hash_ast(node: Node) -> u64 {
418    let mut hasher = DefaultHasher::new();
419    hash_node(node, &mut hasher);
420    hasher.finish()
421}
422
423fn hash_node(node: Node, hasher: &mut DefaultHasher) {
424    node.kind().hash(hasher);
425    let mut cursor = node.walk();
426    for child in node.children(&mut cursor) {
427        hash_node(child, hasher);
428    }
429}
430
431fn cognitive_complexity_go(node: Node) -> usize {
432    let mut score = 0;
433    cc_walk(node, 0, &mut score);
434    score
435}
436
437fn cc_walk(node: Node, nesting: usize, score: &mut usize) {
438    match node.kind() {
439        "if_statement" => {
440            *score += 1 + nesting;
441            cc_children(node, nesting + 1, score);
442            return;
443        }
444        "for_statement" => {
445            *score += 1 + nesting;
446            cc_children(node, nesting + 1, score);
447            return;
448        }
449        "expression_switch_statement" | "type_switch_statement" | "select_statement" => {
450            *score += 1 + nesting;
451            cc_children(node, nesting + 1, score);
452            return;
453        }
454        "else_clause" => {
455            *score += 1; // no nesting increment for else
456        }
457        "binary_expression" => {
458            if let Some(op) = node.child_by_field_name("operator")
459                && (op.kind() == "&&" || op.kind() == "||")
460            {
461                *score += 1;
462            }
463        }
464        _ => {}
465    }
466    cc_children(node, nesting, score);
467}
468
469fn cc_children(node: Node, nesting: usize, score: &mut usize) {
470    let mut cursor = node.walk();
471    for child in node.children(&mut cursor) {
472        cc_walk(child, nesting, score);
473    }
474}
475
476fn collect_field_refs_go(body: Node, src: &[u8]) -> Vec<String> {
477    let mut refs = Vec::new();
478    let mut cursor = body.walk();
479    visit_all(body, &mut cursor, &mut |n| {
480        if n.kind() == "selector_expression"
481            && let Some(field) = n.child_by_field_name("field")
482        {
483            let name = node_text(field, src).to_string();
484            if !refs.contains(&name) {
485                refs.push(name);
486            }
487        }
488    });
489    refs
490}
491
492fn extract_switch_target_go(body: Node, src: &[u8]) -> Option<String> {
493    let mut target = None;
494    let mut cursor = body.walk();
495    visit_all(body, &mut cursor, &mut |n| {
496        if (n.kind() == "expression_switch_statement" || n.kind() == "type_switch_statement")
497            && target.is_none()
498            && let Some(val) = n.child_by_field_name("value")
499        {
500            target = Some(node_text(val, src).to_string());
501        }
502    });
503    target
504}
505
506fn collect_calls(body: Option<Node>, src: &[u8]) -> Vec<String> {
507    let Some(body) = body else { return Vec::new() };
508    let mut calls = Vec::new();
509    let mut cursor = body.walk();
510    visit_all(body, &mut cursor, &mut |n| {
511        if n.kind() == "call_expression"
512            && let Some(func) = n.child(0)
513        {
514            let name = node_text(func, src).to_string();
515            if !calls.contains(&name) {
516                calls.push(name);
517            }
518        }
519    });
520    calls
521}
522
523fn node_text<'a>(node: Node, src: &'a [u8]) -> &'a str {
524    node.utf8_text(src).unwrap_or("")
525}
526
527fn collect_comments(root: Node, src: &[u8]) -> Vec<cha_core::CommentInfo> {
528    let mut comments = Vec::new();
529    let mut cursor = root.walk();
530    visit_all(root, &mut cursor, &mut |n| {
531        if n.kind().contains("comment") {
532            comments.push(cha_core::CommentInfo {
533                text: node_text(n, src).to_string(),
534                line: n.start_position().row + 1,
535            });
536        }
537    });
538    comments
539}
540
541fn visit_all<F: FnMut(Node)>(node: Node, cursor: &mut tree_sitter::TreeCursor, f: &mut F) {
542    f(node);
543    if cursor.goto_first_child() {
544        loop {
545            let child_node = cursor.node();
546            let mut child_cursor = child_node.walk();
547            visit_all(child_node, &mut child_cursor, f);
548            if !cursor.goto_next_sibling() {
549                break;
550            }
551        }
552        cursor.goto_parent();
553    }
554}