Skip to main content

cgx_engine/parsers/
rust.rs

1use tree_sitter::{Parser, Query, QueryCursor};
2
3use crate::parser::{
4    collect_doc_block_above, meta_set, EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind,
5    ParseResult,
6};
7use crate::walker::SourceFile;
8
9pub struct RustParser {
10    language: tree_sitter::Language,
11}
12
13impl RustParser {
14    pub fn new() -> Self {
15        Self {
16            language: tree_sitter_rust::language(),
17        }
18    }
19}
20
21impl Default for RustParser {
22    fn default() -> Self {
23        Self::new()
24    }
25}
26
27impl LanguageParser for RustParser {
28    fn extensions(&self) -> &[&str] {
29        &["rs"]
30    }
31
32    fn extract(&self, file: &SourceFile) -> anyhow::Result<ParseResult> {
33        let mut parser = Parser::new();
34        parser.set_language(&self.language)?;
35
36        let tree = parser
37            .parse(&file.content, None)
38            .ok_or_else(|| anyhow::anyhow!("failed to parse {}", file.relative_path))?;
39
40        let source_bytes = file.content.as_bytes();
41        let root = tree.root_node();
42        let mut nodes = Vec::new();
43        let mut edges = Vec::new();
44
45        let fp = format!("file:{}", file.relative_path);
46
47        // Function definitions
48        if let Ok(query) = Query::new(
49            &self.language,
50            "(function_item name: (identifier) @name) @fn",
51        ) {
52            let mut cursor = QueryCursor::new();
53            for m in cursor.matches(&query, root, source_bytes) {
54                let Some(name_capture) = m
55                    .captures
56                    .iter()
57                    .find(|c| query.capture_names()[c.index as usize] == "name")
58                else {
59                    continue;
60                };
61                let fn_node = m
62                    .captures
63                    .iter()
64                    .find(|c| query.capture_names()[c.index as usize] == "fn")
65                    .map(|c| c.node);
66                let name = node_text(name_capture.node, source_bytes);
67                let start = name_capture.node.start_position();
68                let body_end = fn_node
69                    .map(|n| n.end_position())
70                    .unwrap_or_else(|| name_capture.node.end_position());
71                let id = format!("fn:{}:{}", file.relative_path, name);
72
73                let doc_comment = fn_node
74                    .and_then(|n| collect_doc_block_above(n, source_bytes, is_rust_doc_comment))
75                    .map(strip_rust_doc_markers);
76
77                let mut def = NodeDef {
78                    id: id.clone(),
79                    kind: NodeKind::Function,
80                    name,
81                    path: file.relative_path.clone(),
82                    line_start: start.row as u32 + 1,
83                    line_end: body_end.row as u32 + 1,
84                    ..Default::default()
85                };
86                if let Some(doc) = doc_comment {
87                    meta_set(&mut def, "doc_comment", serde_json::Value::String(doc));
88                }
89                nodes.push(def);
90
91                edges.push(EdgeDef {
92                    src: fp.clone(),
93                    dst: id,
94                    kind: EdgeKind::Exports,
95                    ..Default::default()
96                });
97            }
98        }
99
100        // Struct definitions
101        if let Ok(query) = Query::new(
102            &self.language,
103            "(struct_item name: (type_identifier) @name) @s",
104        ) {
105            extract_type_nodes(
106                &mut nodes,
107                &mut edges,
108                &fp,
109                file,
110                &query,
111                root,
112                source_bytes,
113                NodeKind::Class,
114                "cls",
115            );
116        }
117
118        // Enum definitions
119        if let Ok(query) = Query::new(
120            &self.language,
121            "(enum_item name: (type_identifier) @name) @e",
122        ) {
123            extract_type_nodes(
124                &mut nodes,
125                &mut edges,
126                &fp,
127                file,
128                &query,
129                root,
130                source_bytes,
131                NodeKind::Class,
132                "cls",
133            );
134        }
135
136        // Trait definitions
137        if let Ok(query) = Query::new(
138            &self.language,
139            "(trait_item name: (type_identifier) @name) @t",
140        ) {
141            extract_type_nodes(
142                &mut nodes,
143                &mut edges,
144                &fp,
145                file,
146                &query,
147                root,
148                source_bytes,
149                NodeKind::Class,
150                "cls",
151            );
152        }
153
154        // Impl blocks — add edges for impl'd struct/trait methods
155        if let Ok(query) = Query::new(
156            &self.language,
157            "(impl_item type: (type_identifier) @type body: (_) @body)",
158        ) {
159            let mut cursor = QueryCursor::new();
160            for m in cursor.matches(&query, root, source_bytes) {
161                if let Some(type_cap) = m
162                    .captures
163                    .iter()
164                    .find(|c| query.capture_names()[c.index as usize] == "type")
165                {
166                    let type_name = node_text(type_cap.node, source_bytes);
167                    edges.push(EdgeDef {
168                        src: fp.clone(),
169                        dst: format!("cls:{}:{}", file.relative_path, type_name),
170                        kind: EdgeKind::Exports,
171                        ..Default::default()
172                    });
173                }
174            }
175        }
176
177        // Use statements
178        if let Ok(query) = Query::new(
179            &self.language,
180            "(use_declaration argument: (scoped_identifier path: (_) @path name: (_)?))",
181        ) {
182            let mut cursor = QueryCursor::new();
183            for m in cursor.matches(&query, root, source_bytes) {
184                if let Some(path_cap) = m
185                    .captures
186                    .iter()
187                    .find(|c| query.capture_names()[c.index as usize] == "path")
188                {
189                    let full_path = node_text(path_cap.node, source_bytes);
190                    // Simple case: use crate::foo::bar -> file path is src/foo/bar.rs
191                    let import_path = if full_path.starts_with("crate::") {
192                        format!(
193                            "src/{}.rs",
194                            full_path.trim_start_matches("crate::").replace("::", "/")
195                        )
196                    } else {
197                        continue;
198                    };
199                    edges.push(EdgeDef {
200                        src: fp.clone(),
201                        dst: format!("file:{}", import_path),
202                        kind: EdgeKind::Imports,
203                        ..Default::default()
204                    });
205                }
206            }
207        }
208
209        // Simpler use declarations (use foo::Bar)
210        if let Ok(query) = Query::new(
211            &self.language,
212            "(use_declaration argument: (identifier) @name)",
213        ) {
214            let mut cursor = QueryCursor::new();
215            for m in cursor.matches(&query, root, source_bytes) {
216                if let Some(name_cap) = m
217                    .captures
218                    .iter()
219                    .find(|c| query.capture_names()[c.index as usize] == "name")
220                {
221                    let mod_name = node_text(name_cap.node, source_bytes);
222                    let import_path = mod_name;
223                    edges.push(EdgeDef {
224                        src: fp.clone(),
225                        dst: format!("file:{}.rs", import_path),
226                        kind: EdgeKind::Imports,
227                        ..Default::default()
228                    });
229                }
230            }
231        }
232
233        // Mark pub items as exported
234        mark_pub_exported(&mut nodes, root, source_bytes);
235
236        Ok(ParseResult {
237            nodes,
238            edges,
239            ..Default::default()
240        })
241    }
242}
243
244fn is_pub_item(node: tree_sitter::Node, source_bytes: &[u8]) -> bool {
245    for i in 0..node.child_count() {
246        if let Some(child) = node.child(i) {
247            if child.kind() == "visibility_modifier" {
248                let text = node_text(child, source_bytes);
249                if text == "pub" || text.starts_with("pub(") {
250                    return true;
251                }
252            }
253        }
254    }
255    false
256}
257
258fn mark_pub_exported(
259    nodes: &mut Vec<crate::parser::NodeDef>,
260    root: tree_sitter::Node,
261    source_bytes: &[u8],
262) {
263    walk_pub(nodes, root, source_bytes);
264}
265
266fn walk_pub(nodes: &mut Vec<crate::parser::NodeDef>, node: tree_sitter::Node, source_bytes: &[u8]) {
267    let kind = node.kind();
268    if matches!(
269        kind,
270        "function_item" | "struct_item" | "enum_item" | "trait_item" | "type_item"
271    ) && is_pub_item(node, source_bytes)
272    {
273        // Get the name of this item
274        if let Some(name_node) = node.child_by_field_name("name") {
275            let item_name = node_text(name_node, source_bytes);
276            // Mark the matching node as exported (preserve any existing metadata, e.g. doc_comment).
277            for n in nodes.iter_mut() {
278                if n.name == item_name {
279                    meta_set(n, "exported", serde_json::Value::Bool(true));
280                }
281            }
282        }
283    }
284
285    let mut cursor = node.walk();
286    if cursor.goto_first_child() {
287        loop {
288            walk_pub(nodes, cursor.node(), source_bytes);
289            if !cursor.goto_next_sibling() {
290                break;
291            }
292        }
293    }
294}
295
296/// True if `text` looks like a Rust doc comment: `///`, `//!`, or `/** */`.
297fn is_rust_doc_comment(text: &str) -> bool {
298    let t = text.trim_start();
299    t.starts_with("///") || t.starts_with("//!") || t.starts_with("/**")
300}
301
302/// Strip leading `///`, `//!`, and the `/** ... */` wrapper, joining lines into a single string.
303fn strip_rust_doc_markers(raw: String) -> String {
304    let mut out: Vec<String> = Vec::new();
305    for line in raw.lines() {
306        let l = line.trim();
307        let stripped = if let Some(rest) = l.strip_prefix("///") {
308            rest.trim().to_string()
309        } else if let Some(rest) = l.strip_prefix("//!") {
310            rest.trim().to_string()
311        } else if l.starts_with("/**") {
312            l.trim_start_matches("/**")
313                .trim_end_matches("*/")
314                .trim()
315                .to_string()
316        } else if l.starts_with("*/") {
317            String::new()
318        } else if let Some(rest) = l.strip_prefix("*") {
319            rest.trim().to_string()
320        } else {
321            l.to_string()
322        };
323        out.push(stripped);
324    }
325    out.join("\n").trim().to_string()
326}
327
328#[allow(clippy::too_many_arguments)]
329fn extract_type_nodes(
330    nodes: &mut Vec<NodeDef>,
331    edges: &mut Vec<EdgeDef>,
332    file_id: &str,
333    file: &SourceFile,
334    query: &Query,
335    root: tree_sitter::Node,
336    source_bytes: &[u8],
337    kind: NodeKind,
338    prefix: &str,
339) {
340    let mut cursor = QueryCursor::new();
341    for m in cursor.matches(query, root, source_bytes) {
342        let Some(name_capture) = m
343            .captures
344            .iter()
345            .find(|c| query.capture_names()[c.index as usize] == "name")
346        else {
347            continue;
348        };
349        let name = node_text(name_capture.node, source_bytes);
350        let start = name_capture.node.start_position();
351        // Use the body/item node for both end position and doc-comment lookup.
352        let item_node = m
353            .captures
354            .iter()
355            .find(|c| query.capture_names()[c.index as usize] != "name")
356            .map(|c| c.node);
357        let body_end = item_node
358            .map(|n| n.end_position())
359            .unwrap_or_else(|| name_capture.node.end_position());
360        let id = format!("{}:{}:{}", prefix, file.relative_path, name);
361
362        let doc_comment = item_node
363            .and_then(|n| collect_doc_block_above(n, source_bytes, is_rust_doc_comment))
364            .map(strip_rust_doc_markers);
365
366        let mut def = NodeDef {
367            id: id.clone(),
368            kind: kind.clone(),
369            name,
370            path: file.relative_path.clone(),
371            line_start: start.row as u32 + 1,
372            line_end: body_end.row as u32 + 1,
373            ..Default::default()
374        };
375        if let Some(doc) = doc_comment {
376            meta_set(&mut def, "doc_comment", serde_json::Value::String(doc));
377        }
378        nodes.push(def);
379
380        edges.push(EdgeDef {
381            src: file_id.to_string(),
382            dst: id,
383            kind: EdgeKind::Exports,
384            ..Default::default()
385        });
386    }
387}
388
389fn node_text(node: tree_sitter::Node, source: &[u8]) -> String {
390    node.utf8_text(source).unwrap_or("").to_string()
391}