Skip to main content

cgx_engine/parsers/
go.rs

1use tree_sitter::{Node, Parser, Query, QueryCursor};
2
3use crate::parser::{
4    collect_doc_block_above, meta_set, EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind,
5    ParseResult,
6};
7use crate::walker::SourceFile;
8
9pub struct GoParser {
10    language: tree_sitter::Language,
11}
12
13impl GoParser {
14    pub fn new() -> Self {
15        Self {
16            language: tree_sitter_go::language(),
17        }
18    }
19}
20
21impl Default for GoParser {
22    fn default() -> Self {
23        Self::new()
24    }
25}
26
27impl LanguageParser for GoParser {
28    fn extensions(&self) -> &[&str] {
29        &["go"]
30    }
31
32    fn extract(&self, file: &SourceFile) -> anyhow::Result<ParseResult> {
33        let mut parser = Parser::new();
34        parser.set_language(&self.language)?;
35
36        let tree = parser
37            .parse(&file.content, None)
38            .ok_or_else(|| anyhow::anyhow!("failed to parse {}", file.relative_path))?;
39
40        let source_bytes = file.content.as_bytes();
41        let root = tree.root_node();
42        let mut nodes = Vec::new();
43        let mut edges = Vec::new();
44
45        let fp = file_node_id(&file.relative_path);
46
47        // Parse function declarations: func name(...) { ... }
48        if let Ok(query) = Query::new(
49            &self.language,
50            "(function_declaration name: (identifier) @name) @fn",
51        ) {
52            extract_nodes(
53                &mut nodes,
54                &mut edges,
55                file,
56                &query,
57                root,
58                source_bytes,
59                NodeKind::Function,
60                "fn",
61                &fp,
62            );
63        }
64
65        // Parse method declarations: func (r *Receiver) Name(...) { ... }
66        if let Ok(query) = Query::new(
67            &self.language,
68            "(method_declaration name: (field_identifier) @name) @fn",
69        ) {
70            extract_nodes(
71                &mut nodes,
72                &mut edges,
73                file,
74                &query,
75                root,
76                source_bytes,
77                NodeKind::Function,
78                "fn",
79                &fp,
80            );
81        }
82
83        // Parse type declarations (struct / interface) as Class nodes
84        if let Ok(query) = Query::new(
85            &self.language,
86            "(type_declaration (type_spec name: (type_identifier) @name)) @cls",
87        ) {
88            extract_nodes(
89                &mut nodes,
90                &mut edges,
91                file,
92                &query,
93                root,
94                source_bytes,
95                NodeKind::Class,
96                "cls",
97                &fp,
98            );
99        }
100
101        // Parse imports
102        extract_imports(&mut edges, root, source_bytes, &fp, file);
103
104        // Extract calls
105        extract_calls(&mut edges, root, source_bytes, file);
106
107        Ok(ParseResult {
108            nodes,
109            edges,
110            ..Default::default()
111        })
112    }
113}
114
115fn file_node_id(rel_path: &str) -> String {
116    format!("file:{}", rel_path)
117}
118
119#[allow(clippy::too_many_arguments)]
120fn extract_nodes(
121    nodes: &mut Vec<NodeDef>,
122    edges: &mut Vec<EdgeDef>,
123    file: &SourceFile,
124    query: &Query,
125    root: tree_sitter::Node,
126    source_bytes: &[u8],
127    kind: NodeKind,
128    prefix: &str,
129    file_id: &str,
130) {
131    let mut cursor = QueryCursor::new();
132    for m in cursor.matches(query, root, source_bytes) {
133        let Some(name_capture) = m
134            .captures
135            .iter()
136            .find(|c| query.capture_names()[c.index as usize] == "name")
137        else {
138            continue;
139        };
140
141        let name = node_text(name_capture.node, source_bytes);
142        let node_start = name_capture.node.start_position();
143
144        let item_node = m
145            .captures
146            .iter()
147            .find(|c| {
148                let cap_name = &query.capture_names()[c.index as usize];
149                *cap_name == "fn" || *cap_name == "cls"
150            })
151            .map(|c| c.node);
152        let body_end = item_node
153            .map(|n| n.end_position())
154            .unwrap_or_else(|| name_capture.node.end_position());
155
156        let id = format!("{}:{}:{}", prefix, file.relative_path, name);
157
158        // Go: exported = first letter uppercase. Doc comments are `// ...` lines above.
159        let exported = name
160            .chars()
161            .next()
162            .map(|c| c.is_uppercase())
163            .unwrap_or(false);
164        let doc_comment = item_node
165            .and_then(|n| collect_doc_block_above(n, source_bytes, is_go_doc_comment))
166            .map(strip_go_doc_markers);
167
168        let mut def = NodeDef {
169            id: id.clone(),
170            kind: kind.clone(),
171            name: name.clone(),
172            path: file.relative_path.clone(),
173            line_start: node_start.row as u32 + 1,
174            line_end: body_end.row as u32 + 1,
175            ..Default::default()
176        };
177        if exported {
178            meta_set(&mut def, "exported", serde_json::Value::Bool(true));
179        }
180        if let Some(doc) = doc_comment {
181            meta_set(&mut def, "doc_comment", serde_json::Value::String(doc));
182        }
183        nodes.push(def);
184
185        edges.push(EdgeDef {
186            src: file_id.to_string(),
187            dst: id,
188            kind: EdgeKind::Exports,
189            ..Default::default()
190        });
191    }
192}
193
194/// Go conventionally documents with `// SymbolName ...` lines directly above the decl.
195fn is_go_doc_comment(text: &str) -> bool {
196    let t = text.trim_start();
197    t.starts_with("//") || t.starts_with("/*")
198}
199
200fn strip_go_doc_markers(raw: String) -> String {
201    let mut out: Vec<String> = Vec::new();
202    for line in raw.lines() {
203        let l = line.trim();
204        let stripped = if let Some(rest) = l.strip_prefix("//") {
205            rest.trim().to_string()
206        } else if l.starts_with("/*") {
207            l.trim_start_matches("/*")
208                .trim_end_matches("*/")
209                .trim()
210                .to_string()
211        } else if l.starts_with("*/") {
212            String::new()
213        } else if let Some(rest) = l.strip_prefix('*') {
214            rest.trim().to_string()
215        } else {
216            l.to_string()
217        };
218        out.push(stripped);
219    }
220    out.join("\n").trim().to_string()
221}
222
223fn node_text(node: tree_sitter::Node, source: &[u8]) -> String {
224    node.utf8_text(source).unwrap_or("").to_string()
225}
226
227fn extract_imports(
228    edges: &mut Vec<EdgeDef>,
229    root: tree_sitter::Node,
230    source_bytes: &[u8],
231    file_id: &str,
232    file: &SourceFile,
233) {
234    let mut cursor = root.walk();
235    traverse_imports(edges, root, source_bytes, file_id, file, &mut cursor);
236}
237
238fn traverse_imports(
239    edges: &mut Vec<EdgeDef>,
240    node: tree_sitter::Node,
241    source_bytes: &[u8],
242    file_id: &str,
243    file: &SourceFile,
244    cursor: &mut tree_sitter::TreeCursor,
245) {
246    if node.kind() == "import_declaration" {
247        // Go imports: import "path" or import ( "path1" "path2" )
248        for j in 0..node.child_count() {
249            let Some(import_child) = node.child(j) else {
250                continue;
251            };
252            if import_child.kind() == "import_spec" {
253                // import_spec has a path child
254                for k in 0..import_child.child_count() {
255                    let Some(spec_child) = import_child.child(k) else {
256                        continue;
257                    };
258                    if spec_child.kind() == "interpreted_string_literal"
259                        || spec_child.kind() == "raw_string_literal"
260                    {
261                        let import_path = unquote_str(&source_bytes[spec_child.byte_range()]);
262                        // Only resolve relative imports (same module)
263                        // Go module imports are usually remote; we skip them for local graph
264                        if import_path.starts_with('.') {
265                            let resolved = resolve_import_path(&file.relative_path, &import_path);
266                            if !resolved.is_empty() {
267                                edges.push(EdgeDef {
268                                    src: file_id.to_string(),
269                                    dst: file_node_id(&resolved),
270                                    kind: EdgeKind::Imports,
271                                    ..Default::default()
272                                });
273                            }
274                        }
275                    }
276                }
277            } else if import_child.kind() == "interpreted_string_literal"
278                || import_child.kind() == "raw_string_literal"
279            {
280                // Single import: import "path"
281                let import_path = unquote_str(&source_bytes[import_child.byte_range()]);
282                if import_path.starts_with('.') {
283                    let resolved = resolve_import_path(&file.relative_path, &import_path);
284                    if !resolved.is_empty() {
285                        edges.push(EdgeDef {
286                            src: file_id.to_string(),
287                            dst: file_node_id(&resolved),
288                            kind: EdgeKind::Imports,
289                            ..Default::default()
290                        });
291                    }
292                }
293            }
294        }
295    }
296
297    if cursor.goto_first_child() {
298        loop {
299            let child = cursor.node();
300            traverse_imports(edges, child, source_bytes, file_id, file, cursor);
301            if !cursor.goto_next_sibling() {
302                break;
303            }
304        }
305        cursor.goto_parent();
306    }
307}
308
309fn unquote_str(s: &[u8]) -> String {
310    let s = std::str::from_utf8(s).unwrap_or("");
311    s.trim()
312        .trim_matches('\'')
313        .trim_matches('"')
314        .trim_matches('`')
315        .to_string()
316}
317
318fn resolve_import_path(current: &str, import: &str) -> String {
319    let mut parts: Vec<&str> = current.split('/').collect();
320    parts.pop(); // remove filename
321
322    for segment in import.split('/') {
323        match segment {
324            "." => {}
325            ".." => {
326                parts.pop();
327            }
328            _ => parts.push(segment),
329        }
330    }
331
332    parts.join("/")
333}
334
335fn extract_calls(edges: &mut Vec<EdgeDef>, root: Node, source: &[u8], file: &SourceFile) {
336    let mut fn_stack: Vec<String> = Vec::new();
337    walk_for_calls(edges, root, source, file, &mut fn_stack);
338}
339
340fn is_fn_node(kind: &str) -> bool {
341    matches!(
342        kind,
343        "function_declaration" | "method_declaration" | "func_literal"
344    )
345}
346
347fn fn_name_from_node(node: Node, source: &[u8], file: &SourceFile) -> Option<String> {
348    // function_declaration has `name` field (identifier)
349    // method_declaration has `name` field (field_identifier)
350    if let Some(name_node) = node.child_by_field_name("name") {
351        let name = name_node.utf8_text(source).unwrap_or("").to_string();
352        if !name.is_empty() {
353            return Some(format!("fn:{}:{}", file.relative_path, name));
354        }
355    }
356    None
357}
358
359fn walk_for_calls(
360    edges: &mut Vec<EdgeDef>,
361    node: Node,
362    source: &[u8],
363    file: &SourceFile,
364    fn_stack: &mut Vec<String>,
365) {
366    let kind = node.kind();
367    let pushed = is_fn_node(kind);
368
369    if pushed {
370        if let Some(id) = fn_name_from_node(node, source, file) {
371            fn_stack.push(id);
372        } else {
373            fn_stack.push(String::new());
374        }
375    }
376
377    if kind == "call_expression" {
378        if let Some(caller_id) = fn_stack.last().filter(|s| !s.is_empty()) {
379            let callee_name = node
380                .child_by_field_name("function")
381                .and_then(|func| match func.kind() {
382                    "identifier" => Some(func.utf8_text(source).unwrap_or("").to_string()),
383                    "selector_expression" => func
384                        .child_by_field_name("field")
385                        .map(|p| p.utf8_text(source).unwrap_or("").to_string()),
386                    _ => None,
387                })
388                .unwrap_or_default();
389
390            if !callee_name.is_empty() {
391                edges.push(EdgeDef {
392                    src: caller_id.clone(),
393                    dst: callee_name,
394                    kind: EdgeKind::Calls,
395                    confidence: 0.7,
396                    ..Default::default()
397                });
398            }
399        }
400    }
401
402    let mut cursor = node.walk();
403    if cursor.goto_first_child() {
404        loop {
405            walk_for_calls(edges, cursor.node(), source, file, fn_stack);
406            if !cursor.goto_next_sibling() {
407                break;
408            }
409        }
410    }
411
412    if pushed {
413        fn_stack.pop();
414    }
415}