Skip to main content

agentic_codebase/parse/
rust.rs

1//! Rust-specific parsing using tree-sitter.
2//!
3//! Extracts functions, structs, enums, traits, impls, mods, use declarations, macros.
4
5use std::path::Path;
6
7use crate::types::{AcbResult, CodeUnitType, Language, Visibility};
8
9use super::treesitter::{count_complexity, get_node_text, node_to_span};
10use super::{LanguageParser, RawCodeUnit, RawReference, ReferenceKind};
11
12/// Rust language parser.
13pub struct RustParser;
14
15impl Default for RustParser {
16    fn default() -> Self {
17        Self::new()
18    }
19}
20
21impl RustParser {
22    /// Create a new Rust parser.
23    pub fn new() -> Self {
24        Self
25    }
26
27    fn extract_from_node(
28        &self,
29        node: tree_sitter::Node,
30        source: &str,
31        file_path: &Path,
32        units: &mut Vec<RawCodeUnit>,
33        next_id: &mut u64,
34        parent_qname: &str,
35    ) {
36        let mut cursor = node.walk();
37        for child in node.children(&mut cursor) {
38            match child.kind() {
39                "function_item" => {
40                    if let Some(unit) =
41                        self.extract_function(child, source, file_path, parent_qname, next_id)
42                    {
43                        units.push(unit);
44                    }
45                }
46                "struct_item" => {
47                    if let Some(unit) = self.extract_type_def(
48                        child,
49                        source,
50                        file_path,
51                        CodeUnitType::Type,
52                        parent_qname,
53                        next_id,
54                    ) {
55                        units.push(unit);
56                    }
57                }
58                "enum_item" => {
59                    if let Some(unit) = self.extract_type_def(
60                        child,
61                        source,
62                        file_path,
63                        CodeUnitType::Type,
64                        parent_qname,
65                        next_id,
66                    ) {
67                        units.push(unit);
68                    }
69                }
70                "trait_item" => {
71                    if let Some(unit) = self.extract_type_def(
72                        child,
73                        source,
74                        file_path,
75                        CodeUnitType::Trait,
76                        parent_qname,
77                        next_id,
78                    ) {
79                        let qname = unit.qualified_name.clone();
80                        units.push(unit);
81                        if let Some(body) = child.child_by_field_name("body") {
82                            self.extract_from_node(body, source, file_path, units, next_id, &qname);
83                        }
84                    }
85                }
86                "impl_item" => {
87                    if let Some(unit) =
88                        self.extract_impl(child, source, file_path, parent_qname, next_id)
89                    {
90                        let qname = unit.qualified_name.clone();
91                        units.push(unit);
92                        if let Some(body) = child.child_by_field_name("body") {
93                            self.extract_from_node(body, source, file_path, units, next_id, &qname);
94                        }
95                    }
96                }
97                "mod_item" => {
98                    if let Some(unit) =
99                        self.extract_mod(child, source, file_path, parent_qname, next_id)
100                    {
101                        let qname = unit.qualified_name.clone();
102                        units.push(unit);
103                        if let Some(body) = child.child_by_field_name("body") {
104                            self.extract_from_node(body, source, file_path, units, next_id, &qname);
105                        }
106                    }
107                }
108                "use_declaration" => {
109                    if let Some(unit) =
110                        self.extract_use(child, source, file_path, parent_qname, next_id)
111                    {
112                        units.push(unit);
113                    }
114                }
115                "macro_definition" => {
116                    if let Some(unit) =
117                        self.extract_macro(child, source, file_path, parent_qname, next_id)
118                    {
119                        units.push(unit);
120                    }
121                }
122                _ => {}
123            }
124        }
125    }
126
127    fn extract_function(
128        &self,
129        node: tree_sitter::Node,
130        source: &str,
131        file_path: &Path,
132        parent_qname: &str,
133        next_id: &mut u64,
134    ) -> Option<RawCodeUnit> {
135        let name_node = node.child_by_field_name("name")?;
136        let name = get_node_text(name_node, source).to_string();
137        let qname = make_qname(parent_qname, &name);
138        let span = node_to_span(node);
139
140        // Signature
141        let sig = node.child_by_field_name("parameters").map(|params| {
142            let params_text = get_node_text(params, source);
143            let ret = node
144                .child_by_field_name("return_type")
145                .map(|r| format!(" -> {}", get_node_text(r, source)))
146                .unwrap_or_default();
147            format!("fn {}{}{}", name, params_text, ret)
148        });
149
150        let vis = rust_visibility(node, source);
151        let fn_text = &source[node.byte_range()];
152        let is_async = fn_text.contains("async fn ") || fn_text.trim_start().starts_with("async ");
153
154        let complexity_kinds = &[
155            "if_expression",
156            "else_clause",
157            "for_expression",
158            "while_expression",
159            "loop_expression",
160            "match_arm",
161            "binary_expression",
162        ];
163        let complexity = count_complexity(node, complexity_kinds);
164
165        let doc = extract_rust_doc(node, source);
166
167        let is_test = name.starts_with("test_") || source[node.byte_range()].contains("#[test]");
168
169        let unit_type = if is_test {
170            CodeUnitType::Test
171        } else {
172            CodeUnitType::Function
173        };
174
175        let id = *next_id;
176        *next_id += 1;
177
178        let mut unit = RawCodeUnit::new(
179            unit_type,
180            Language::Rust,
181            name,
182            file_path.to_path_buf(),
183            span,
184        );
185        unit.temp_id = id;
186        unit.qualified_name = qname;
187        unit.signature = sig;
188        unit.doc = doc;
189        unit.visibility = vis;
190        unit.is_async = is_async;
191        unit.complexity = complexity;
192
193        Some(unit)
194    }
195
196    fn extract_type_def(
197        &self,
198        node: tree_sitter::Node,
199        source: &str,
200        file_path: &Path,
201        unit_type: CodeUnitType,
202        parent_qname: &str,
203        next_id: &mut u64,
204    ) -> Option<RawCodeUnit> {
205        let name_node = node.child_by_field_name("name")?;
206        let name = get_node_text(name_node, source).to_string();
207        let qname = make_qname(parent_qname, &name);
208        let span = node_to_span(node);
209        let vis = rust_visibility(node, source);
210        let doc = extract_rust_doc(node, source);
211
212        let id = *next_id;
213        *next_id += 1;
214
215        let mut unit = RawCodeUnit::new(
216            unit_type,
217            Language::Rust,
218            name,
219            file_path.to_path_buf(),
220            span,
221        );
222        unit.temp_id = id;
223        unit.qualified_name = qname;
224        unit.doc = doc;
225        unit.visibility = vis;
226
227        Some(unit)
228    }
229
230    fn extract_impl(
231        &self,
232        node: tree_sitter::Node,
233        source: &str,
234        file_path: &Path,
235        parent_qname: &str,
236        next_id: &mut u64,
237    ) -> Option<RawCodeUnit> {
238        let type_node = node.child_by_field_name("type")?;
239        let type_name = get_node_text(type_node, source).to_string();
240
241        let trait_name = node
242            .child_by_field_name("trait")
243            .map(|t| get_node_text(t, source).to_string());
244
245        let name = if let Some(ref tr) = trait_name {
246            format!("impl {} for {}", tr, type_name)
247        } else {
248            format!("impl {}", type_name)
249        };
250
251        let qname = make_qname(parent_qname, &name);
252        let span = node_to_span(node);
253
254        let id = *next_id;
255        *next_id += 1;
256
257        let mut unit = RawCodeUnit::new(
258            CodeUnitType::Impl,
259            Language::Rust,
260            name,
261            file_path.to_path_buf(),
262            span,
263        );
264        unit.temp_id = id;
265        unit.qualified_name = qname;
266
267        if let Some(tr) = trait_name {
268            unit.references.push(RawReference {
269                name: tr,
270                kind: ReferenceKind::Implement,
271                span: node_to_span(type_node),
272            });
273        }
274
275        Some(unit)
276    }
277
278    fn extract_mod(
279        &self,
280        node: tree_sitter::Node,
281        source: &str,
282        file_path: &Path,
283        parent_qname: &str,
284        next_id: &mut u64,
285    ) -> Option<RawCodeUnit> {
286        let name_node = node.child_by_field_name("name")?;
287        let name = get_node_text(name_node, source).to_string();
288        let qname = make_qname(parent_qname, &name);
289        let span = node_to_span(node);
290        let vis = rust_visibility(node, source);
291
292        let id = *next_id;
293        *next_id += 1;
294
295        let mut unit = RawCodeUnit::new(
296            CodeUnitType::Module,
297            Language::Rust,
298            name,
299            file_path.to_path_buf(),
300            span,
301        );
302        unit.temp_id = id;
303        unit.qualified_name = qname;
304        unit.visibility = vis;
305
306        Some(unit)
307    }
308
309    fn extract_use(
310        &self,
311        node: tree_sitter::Node,
312        source: &str,
313        file_path: &Path,
314        parent_qname: &str,
315        next_id: &mut u64,
316    ) -> Option<RawCodeUnit> {
317        let text = get_node_text(node, source).to_string();
318        let span = node_to_span(node);
319        let import_name = text
320            .trim_start_matches("use ")
321            .trim_end_matches(';')
322            .trim()
323            .to_string();
324
325        let id = *next_id;
326        *next_id += 1;
327
328        let mut unit = RawCodeUnit::new(
329            CodeUnitType::Import,
330            Language::Rust,
331            import_name.clone(),
332            file_path.to_path_buf(),
333            span,
334        );
335        unit.temp_id = id;
336        unit.qualified_name = make_qname(parent_qname, &import_name);
337        unit.references.push(RawReference {
338            name: import_name,
339            kind: ReferenceKind::Import,
340            span,
341        });
342
343        Some(unit)
344    }
345
346    fn extract_macro(
347        &self,
348        node: tree_sitter::Node,
349        source: &str,
350        file_path: &Path,
351        parent_qname: &str,
352        next_id: &mut u64,
353    ) -> Option<RawCodeUnit> {
354        let name_node = node.child_by_field_name("name")?;
355        let name = get_node_text(name_node, source).to_string();
356        let qname = make_qname(parent_qname, &name);
357        let span = node_to_span(node);
358
359        let id = *next_id;
360        *next_id += 1;
361
362        let mut unit = RawCodeUnit::new(
363            CodeUnitType::Macro,
364            Language::Rust,
365            name,
366            file_path.to_path_buf(),
367            span,
368        );
369        unit.temp_id = id;
370        unit.qualified_name = qname;
371        unit.visibility = rust_visibility(node, source);
372
373        Some(unit)
374    }
375}
376
377impl LanguageParser for RustParser {
378    fn extract_units(
379        &self,
380        tree: &tree_sitter::Tree,
381        source: &str,
382        file_path: &Path,
383    ) -> AcbResult<Vec<RawCodeUnit>> {
384        let mut units = Vec::new();
385        let mut next_id = 0u64;
386
387        let module_name = file_path
388            .file_stem()
389            .and_then(|s| s.to_str())
390            .unwrap_or("unknown")
391            .to_string();
392
393        let root_span = node_to_span(tree.root_node());
394        let mut module_unit = RawCodeUnit::new(
395            CodeUnitType::Module,
396            Language::Rust,
397            module_name.clone(),
398            file_path.to_path_buf(),
399            root_span,
400        );
401        module_unit.temp_id = next_id;
402        module_unit.qualified_name = module_name.clone();
403        next_id += 1;
404        units.push(module_unit);
405
406        self.extract_from_node(
407            tree.root_node(),
408            source,
409            file_path,
410            &mut units,
411            &mut next_id,
412            &module_name,
413        );
414
415        Ok(units)
416    }
417
418    fn is_test_file(&self, path: &Path, source: &str) -> bool {
419        let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
420        path.components().any(|c| c.as_os_str() == "tests")
421            || name.ends_with("_test.rs")
422            || source.contains("#[cfg(test)]")
423            || source.contains("#[test]")
424    }
425}
426
427fn make_qname(parent: &str, name: &str) -> String {
428    if parent.is_empty() {
429        name.to_string()
430    } else {
431        format!("{}::{}", parent, name)
432    }
433}
434
435fn rust_visibility(node: tree_sitter::Node, source: &str) -> Visibility {
436    let text = get_node_text(node, source);
437    if text.starts_with("pub(crate)") {
438        Visibility::Internal
439    } else if text.starts_with("pub(super)") {
440        Visibility::Protected
441    } else if text.starts_with("pub ") || text.starts_with("pub(") {
442        Visibility::Public
443    } else {
444        Visibility::Private
445    }
446}
447
448fn extract_rust_doc(node: tree_sitter::Node, source: &str) -> Option<String> {
449    let mut prev = node.prev_sibling();
450    while let Some(p) = prev {
451        if p.kind() == "line_comment" {
452            let text = get_node_text(p, source);
453            if let Some(stripped) = text.strip_prefix("///") {
454                return Some(stripped.trim().to_string());
455            }
456            if let Some(stripped) = text.strip_prefix("//!") {
457                return Some(stripped.trim().to_string());
458            }
459        } else {
460            break;
461        }
462        prev = p.prev_sibling();
463    }
464    None
465}