Skip to main content

cha_parser/
golang.rs

1use std::collections::hash_map::DefaultHasher;
2use std::hash::{Hash, Hasher};
3
4use cha_core::{ClassInfo, FunctionInfo, ImportInfo, SourceFile, SourceModel};
5use tree_sitter::{Node, Parser};
6
7use crate::LanguageParser;
8
9pub struct GolangParser;
10
11impl LanguageParser for GolangParser {
12    fn language_name(&self) -> &str {
13        "go"
14    }
15
16    fn parse(&self, file: &SourceFile) -> Option<SourceModel> {
17        let mut parser = Parser::new();
18        parser.set_language(&tree_sitter_go::LANGUAGE.into()).ok()?;
19        let tree = parser.parse(&file.content, None)?;
20        let root = tree.root_node();
21        let src = file.content.as_bytes();
22
23        let mut functions = Vec::new();
24        let mut classes = Vec::new();
25        let mut imports = Vec::new();
26
27        let imports_map = crate::golang_imports::build(root, src, &file.path);
28        collect_top_level(
29            root,
30            src,
31            &imports_map,
32            &mut functions,
33            &mut classes,
34            &mut imports,
35        );
36
37        Some(SourceModel {
38            language: "go".into(),
39            total_lines: file.line_count(),
40            functions,
41            classes,
42            imports,
43            comments: collect_comments(root, src),
44            type_aliases: vec![], // TODO(parser): extract type aliases from 'type X = Y' declarations
45        })
46    }
47}
48
49fn collect_top_level(
50    root: Node,
51    src: &[u8],
52    imports_map: &crate::type_ref::ImportsMap,
53    functions: &mut Vec<FunctionInfo>,
54    classes: &mut Vec<ClassInfo>,
55    imports: &mut Vec<ImportInfo>,
56) {
57    let mut cursor = root.walk();
58    for child in root.children(&mut cursor) {
59        match child.kind() {
60            "function_declaration" | "method_declaration" => {
61                if let Some(f) = extract_function(child, src, imports_map) {
62                    functions.push(f);
63                }
64            }
65            "type_declaration" => extract_type_decl(child, src, classes),
66            "import_declaration" => collect_imports(child, src, imports),
67            _ => {}
68        }
69    }
70}
71
72fn extract_function(
73    node: Node,
74    src: &[u8],
75    imports_map: &crate::type_ref::ImportsMap,
76) -> Option<FunctionInfo> {
77    let name_node = node.child_by_field_name("name")?;
78    let name = node_text(name_node, src).to_string();
79    let name_col = name_node.start_position().column;
80    let name_end_col = name_node.end_position().column;
81    let start_line = node.start_position().row + 1;
82    let end_line = node.end_position().row + 1;
83    let body = node.child_by_field_name("body");
84    let params = node.child_by_field_name("parameters");
85    let (param_count, param_types) = params
86        .map(|p| extract_params(p, src, imports_map))
87        .unwrap_or((0, vec![]));
88    let is_exported = name.starts_with(|c: char| c.is_uppercase());
89
90    Some(FunctionInfo {
91        name,
92        start_line,
93        end_line,
94        name_col,
95        name_end_col,
96        line_count: end_line - start_line + 1,
97        complexity: count_complexity(node),
98        body_hash: body.map(hash_ast),
99        is_exported,
100        parameter_count: param_count,
101        parameter_types: param_types,
102        chain_depth: body.map(max_chain_depth).unwrap_or(0),
103        switch_arms: body.map(count_case_clauses).unwrap_or(0),
104        external_refs: body
105            .map(|b| collect_external_refs(b, src))
106            .unwrap_or_default(),
107        is_delegating: body.map(|b| check_delegating(b, src)).unwrap_or(false),
108        comment_lines: count_comment_lines(node, src),
109        referenced_fields: body
110            .map(|b| collect_field_refs_go(b, src))
111            .unwrap_or_default(),
112        null_check_fields: body.map(|b| collect_nil_checks(b, src)).unwrap_or_default(),
113        switch_dispatch_target: body.and_then(|b| extract_switch_target_go(b, src)),
114        optional_param_count: 0,
115        called_functions: collect_calls(body, src),
116        cognitive_complexity: body.map(|b| cognitive_complexity_go(b)).unwrap_or(0),
117        return_type: node
118            .child_by_field_name("result")
119            .map(|rt| crate::type_ref::resolve(node_text(rt, src), imports_map)),
120    })
121}
122
123fn extract_type_decl(node: Node, src: &[u8], classes: &mut Vec<ClassInfo>) {
124    let mut cursor = node.walk();
125    for child in node.children(&mut cursor) {
126        if child.kind() == "type_spec"
127            && let Some(c) = extract_struct(child, src)
128        {
129            classes.push(c);
130        }
131    }
132}
133
134fn extract_struct(node: Node, src: &[u8]) -> Option<ClassInfo> {
135    let name_node = node.child_by_field_name("name")?;
136    let name = node_text(name_node, src).to_string();
137    let name_col = name_node.start_position().column;
138    let name_end_col = name_node.end_position().column;
139    let type_node = node.child_by_field_name("type")?;
140    if type_node.kind() != "struct_type" && type_node.kind() != "interface_type" {
141        return None;
142    }
143    let is_interface = type_node.kind() == "interface_type";
144    let start_line = node.start_position().row + 1;
145    let end_line = node.end_position().row + 1;
146    let field_count = count_struct_fields(type_node);
147    let is_exported = name.starts_with(|c: char| c.is_uppercase());
148
149    Some(ClassInfo {
150        name,
151        start_line,
152        end_line,
153        name_col,
154        name_end_col,
155        line_count: end_line - start_line + 1,
156        method_count: 0,
157        is_exported,
158        delegating_method_count: 0,
159        field_count,
160        field_names: Vec::new(),
161        field_types: Vec::new(),
162        has_behavior: false,
163        is_interface,
164        parent_name: None,
165        override_count: 0,
166        self_call_count: 0,
167        has_listener_field: false,
168        has_notify_method: false,
169    })
170}
171
172fn count_struct_fields(node: Node) -> usize {
173    let mut count = 0;
174    let mut cursor = node.walk();
175    visit_all(node, &mut cursor, &mut |n| {
176        if n.kind() == "field_declaration" {
177            count += 1;
178        }
179    });
180    count
181}
182
183fn collect_imports(node: Node, src: &[u8], imports: &mut Vec<ImportInfo>) {
184    let mut cursor = node.walk();
185    visit_all(node, &mut cursor, &mut |n| {
186        if n.kind() == "import_spec" {
187            let line = n.start_position().row + 1;
188            let col = n.start_position().column;
189            let path_node = n.child_by_field_name("path").unwrap_or(n);
190            let text = node_text(path_node, src).trim_matches('"').to_string();
191            if !text.is_empty() {
192                imports.push(ImportInfo {
193                    source: text,
194                    line,
195                    col,
196                    ..Default::default()
197                });
198            }
199        }
200    });
201}
202
203fn extract_params(
204    params: Node,
205    src: &[u8],
206    imports_map: &crate::type_ref::ImportsMap,
207) -> (usize, Vec<cha_core::TypeRef>) {
208    let mut count = 0;
209    let mut types = Vec::new();
210    let mut cursor = params.walk();
211    for child in params.children(&mut cursor) {
212        if child.kind() == "parameter_declaration" {
213            let raw = child
214                .child_by_field_name("type")
215                .map(|t| node_text(t, src).to_string())
216                .unwrap_or_else(|| "any".into());
217            // Count names in this declaration (e.g. `a, b int` = 2 params)
218            let mut inner = child.walk();
219            let names: usize = child
220                .children(&mut inner)
221                .filter(|c| c.kind() == "identifier")
222                .count()
223                .max(1);
224            for _ in 0..names {
225                count += 1;
226                types.push(resolve_go_type(&raw, imports_map));
227            }
228        }
229    }
230    (count, types)
231}
232
233/// Go types are `pkg.TypeName` or `*pkg.TypeName`; the importable name in
234/// ImportsMap is the package alias. Split on `.`, look up the first segment.
235fn resolve_go_type(raw: &str, imports_map: &crate::type_ref::ImportsMap) -> cha_core::TypeRef {
236    // Strip decorations to get the inner `pkg.Type` or `Type`.
237    let inner = raw.trim_start_matches('*').trim_start_matches('[').trim();
238    let inner = inner.trim_start_matches(']').trim();
239    let mut parts = inner.splitn(2, '.');
240    let first = parts.next().unwrap_or(inner);
241    let second = parts.next();
242    let (short_name, origin) = if let Some(type_part) = second {
243        let origin = imports_map
244            .get(first)
245            .cloned()
246            .unwrap_or(cha_core::TypeOrigin::Unknown);
247        (type_part.to_string(), origin)
248    } else {
249        // No `.` → builtin type (string, int, bool, etc.) or locally-declared
250        // type. Treat builtin primitives accordingly; everything else → Local.
251        let origin = if is_go_builtin(inner) {
252            cha_core::TypeOrigin::Primitive
253        } else {
254            cha_core::TypeOrigin::Local
255        };
256        (inner.to_string(), origin)
257    };
258    cha_core::TypeRef {
259        name: short_name,
260        raw: raw.to_string(),
261        origin,
262    }
263}
264
265fn is_go_builtin(name: &str) -> bool {
266    matches!(
267        name,
268        "bool"
269            | "byte"
270            | "complex64"
271            | "complex128"
272            | "error"
273            | "float32"
274            | "float64"
275            | "int"
276            | "int8"
277            | "int16"
278            | "int32"
279            | "int64"
280            | "rune"
281            | "string"
282            | "uint"
283            | "uint8"
284            | "uint16"
285            | "uint32"
286            | "uint64"
287            | "uintptr"
288            | "any"
289            | "interface{}"
290    )
291}
292
293fn count_complexity(node: Node) -> usize {
294    let mut c = 1usize;
295    let mut cursor = node.walk();
296    visit_all(node, &mut cursor, &mut |n| match n.kind() {
297        "if_statement" | "for_statement" | "expression_case" | "default_case" | "type_case"
298        | "select_statement" | "go_statement" => c += 1,
299        "binary_expression" => {
300            if let Some(op) = n.child_by_field_name("operator") {
301                let kind = op.kind();
302                if kind == "&&" || kind == "||" {
303                    c += 1;
304                }
305            }
306        }
307        _ => {}
308    });
309    c
310}
311
312fn max_chain_depth(node: Node) -> usize {
313    let mut max = 0;
314    let mut cursor = node.walk();
315    visit_all(node, &mut cursor, &mut |n| {
316        if n.kind() == "selector_expression" {
317            let d = chain_len(n);
318            if d > max {
319                max = d;
320            }
321        }
322    });
323    max
324}
325
326fn chain_len(node: Node) -> usize {
327    let mut depth = 0;
328    let mut current = node;
329    while current.kind() == "selector_expression" || current.kind() == "call_expression" {
330        if current.kind() == "selector_expression" {
331            depth += 1;
332        }
333        match current.child(0) {
334            Some(c) => current = c,
335            None => break,
336        }
337    }
338    depth
339}
340
341fn count_case_clauses(node: Node) -> usize {
342    let mut count = 0;
343    let mut cursor = node.walk();
344    visit_all(node, &mut cursor, &mut |n| {
345        if n.kind() == "expression_case" || n.kind() == "default_case" || n.kind() == "type_case" {
346            count += 1;
347        }
348    });
349    count
350}
351
352fn collect_external_refs(node: Node, src: &[u8]) -> Vec<String> {
353    let mut refs = Vec::new();
354    let mut cursor = node.walk();
355    visit_all(node, &mut cursor, &mut |n| {
356        if n.kind() == "selector_expression"
357            && let Some(obj) = n.child(0)
358            && obj.kind() == "identifier"
359        {
360            let text = node_text(obj, src).to_string();
361            if !refs.contains(&text) {
362                refs.push(text);
363            }
364        }
365    });
366    refs
367}
368
369fn check_delegating(body: Node, src: &[u8]) -> bool {
370    let mut cursor = body.walk();
371    let stmts: Vec<Node> = body
372        .children(&mut cursor)
373        .filter(|n| n.kind() != "{" && n.kind() != "}" && n.kind() != "comment")
374        .collect();
375    if stmts.len() != 1 {
376        return false;
377    }
378    let stmt = stmts[0];
379    let call = match stmt.kind() {
380        "return_statement" => stmt.child(1).filter(|c| c.kind() == "call_expression"),
381        "expression_statement" => stmt.child(0).filter(|c| c.kind() == "call_expression"),
382        _ => None,
383    };
384    call.and_then(|c| c.child(0))
385        .is_some_and(|f| node_text(f, src).contains('.'))
386}
387
388fn count_comment_lines(node: Node, src: &[u8]) -> usize {
389    let mut count = 0;
390    let mut cursor = node.walk();
391    visit_all(node, &mut cursor, &mut |n| {
392        if n.kind() == "comment" {
393            count += node_text(n, src).lines().count();
394        }
395    });
396    count
397}
398
399fn collect_nil_checks(body: Node, src: &[u8]) -> Vec<String> {
400    let mut fields = Vec::new();
401    let mut cursor = body.walk();
402    visit_all(body, &mut cursor, &mut |n| {
403        if n.kind() != "binary_expression" {
404            return;
405        }
406        let text = node_text(n, src);
407        if !text.contains("nil") {
408            return;
409        }
410        if let Some(left) = n.child(0) {
411            let name = node_text(left, src).to_string();
412            if !fields.contains(&name) {
413                fields.push(name);
414            }
415        }
416    });
417    fields
418}
419
420fn hash_ast(node: Node) -> u64 {
421    let mut hasher = DefaultHasher::new();
422    hash_node(node, &mut hasher);
423    hasher.finish()
424}
425
426fn hash_node(node: Node, hasher: &mut DefaultHasher) {
427    node.kind().hash(hasher);
428    let mut cursor = node.walk();
429    for child in node.children(&mut cursor) {
430        hash_node(child, hasher);
431    }
432}
433
434fn cognitive_complexity_go(node: Node) -> usize {
435    let mut score = 0;
436    cc_walk(node, 0, &mut score);
437    score
438}
439
440fn cc_walk(node: Node, nesting: usize, score: &mut usize) {
441    match node.kind() {
442        "if_statement" => {
443            *score += 1 + nesting;
444            cc_children(node, nesting + 1, score);
445            return;
446        }
447        "for_statement" => {
448            *score += 1 + nesting;
449            cc_children(node, nesting + 1, score);
450            return;
451        }
452        "expression_switch_statement" | "type_switch_statement" | "select_statement" => {
453            *score += 1 + nesting;
454            cc_children(node, nesting + 1, score);
455            return;
456        }
457        "else_clause" => {
458            *score += 1; // no nesting increment for else
459        }
460        "binary_expression" => {
461            if let Some(op) = node.child_by_field_name("operator")
462                && (op.kind() == "&&" || op.kind() == "||")
463            {
464                *score += 1;
465            }
466        }
467        _ => {}
468    }
469    cc_children(node, nesting, score);
470}
471
472fn cc_children(node: Node, nesting: usize, score: &mut usize) {
473    let mut cursor = node.walk();
474    for child in node.children(&mut cursor) {
475        cc_walk(child, nesting, score);
476    }
477}
478
479fn collect_field_refs_go(body: Node, src: &[u8]) -> Vec<String> {
480    let mut refs = Vec::new();
481    let mut cursor = body.walk();
482    visit_all(body, &mut cursor, &mut |n| {
483        if n.kind() == "selector_expression"
484            && let Some(field) = n.child_by_field_name("field")
485        {
486            let name = node_text(field, src).to_string();
487            if !refs.contains(&name) {
488                refs.push(name);
489            }
490        }
491    });
492    refs
493}
494
495fn extract_switch_target_go(body: Node, src: &[u8]) -> Option<String> {
496    let mut target = None;
497    let mut cursor = body.walk();
498    visit_all(body, &mut cursor, &mut |n| {
499        if (n.kind() == "expression_switch_statement" || n.kind() == "type_switch_statement")
500            && target.is_none()
501            && let Some(val) = n.child_by_field_name("value")
502        {
503            target = Some(node_text(val, src).to_string());
504        }
505    });
506    target
507}
508
509fn collect_calls(body: Option<Node>, src: &[u8]) -> Vec<String> {
510    let Some(body) = body else { return Vec::new() };
511    let mut calls = Vec::new();
512    let mut cursor = body.walk();
513    visit_all(body, &mut cursor, &mut |n| {
514        if n.kind() == "call_expression"
515            && let Some(func) = n.child(0)
516        {
517            let name = node_text(func, src).to_string();
518            if !calls.contains(&name) {
519                calls.push(name);
520            }
521        }
522    });
523    calls
524}
525
526fn node_text<'a>(node: Node, src: &'a [u8]) -> &'a str {
527    node.utf8_text(src).unwrap_or("")
528}
529
530fn collect_comments(root: Node, src: &[u8]) -> Vec<cha_core::CommentInfo> {
531    let mut comments = Vec::new();
532    let mut cursor = root.walk();
533    visit_all(root, &mut cursor, &mut |n| {
534        if n.kind().contains("comment") {
535            comments.push(cha_core::CommentInfo {
536                text: node_text(n, src).to_string(),
537                line: n.start_position().row + 1,
538            });
539        }
540    });
541    comments
542}
543
544fn visit_all<F: FnMut(Node)>(node: Node, cursor: &mut tree_sitter::TreeCursor, f: &mut F) {
545    f(node);
546    if cursor.goto_first_child() {
547        loop {
548            let child_node = cursor.node();
549            let mut child_cursor = child_node.walk();
550            visit_all(child_node, &mut child_cursor, f);
551            if !cursor.goto_next_sibling() {
552                break;
553            }
554        }
555        cursor.goto_parent();
556    }
557}