Skip to main content

agentic_codebase/parse/
cpp.rs

1//! C++ parsing using tree-sitter.
2//!
3//! Extracts functions, classes, structs, namespaces, enums, templates, includes.
4
5use std::path::Path;
6
7use crate::types::{AcbResult, CodeUnitType, Language, Visibility};
8
9use super::treesitter::{get_node_text, node_to_span};
10use super::{LanguageParser, RawCodeUnit};
11
12/// C++ language parser.
13pub struct CppParser;
14
15impl Default for CppParser {
16    fn default() -> Self {
17        Self::new()
18    }
19}
20
21impl CppParser {
22    /// Create a new C++ parser.
23    pub fn new() -> Self {
24        Self
25    }
26
27    fn extract_from_node(
28        &self,
29        node: tree_sitter::Node,
30        source: &str,
31        file_path: &Path,
32        units: &mut Vec<RawCodeUnit>,
33        next_id: &mut u64,
34        parent_qname: &str,
35    ) {
36        let mut cursor = node.walk();
37        for child in node.children(&mut cursor) {
38            match child.kind() {
39                "function_definition" => {
40                    if let Some(unit) =
41                        self.extract_function(child, source, file_path, parent_qname, next_id)
42                    {
43                        units.push(unit);
44                    }
45                }
46                "declaration" => {
47                    // A declaration can contain a function declarator (forward decl / prototype)
48                    // or a variable. We extract function prototypes.
49                    if let Some(unit) =
50                        self.extract_declaration(child, source, file_path, parent_qname, next_id)
51                    {
52                        units.push(unit);
53                    }
54                }
55                "class_specifier" => {
56                    self.extract_class_or_struct(
57                        child,
58                        source,
59                        file_path,
60                        units,
61                        next_id,
62                        parent_qname,
63                    );
64                }
65                "struct_specifier" => {
66                    self.extract_class_or_struct(
67                        child,
68                        source,
69                        file_path,
70                        units,
71                        next_id,
72                        parent_qname,
73                    );
74                }
75                "namespace_definition" => {
76                    self.extract_namespace(child, source, file_path, units, next_id, parent_qname);
77                }
78                "enum_specifier" => {
79                    if let Some(unit) =
80                        self.extract_enum(child, source, file_path, parent_qname, next_id)
81                    {
82                        units.push(unit);
83                    }
84                }
85                "template_declaration" => {
86                    // Recurse into the template body to find the actual declaration
87                    self.extract_from_node(child, source, file_path, units, next_id, parent_qname);
88                }
89                "preproc_include" => {
90                    if let Some(unit) =
91                        self.extract_include(child, source, file_path, parent_qname, next_id)
92                    {
93                        units.push(unit);
94                    }
95                }
96                // Top-level declarations wrapped in linkage_specification (extern "C" { ... })
97                "linkage_specification" => {
98                    self.extract_from_node(child, source, file_path, units, next_id, parent_qname);
99                }
100                _ => {}
101            }
102        }
103    }
104
105    fn extract_function(
106        &self,
107        node: tree_sitter::Node,
108        source: &str,
109        file_path: &Path,
110        parent_qname: &str,
111        next_id: &mut u64,
112    ) -> Option<RawCodeUnit> {
113        let name = self.function_name(node, source)?;
114        let qname = cpp_qname(parent_qname, &name);
115        let span = node_to_span(node);
116
117        let id = *next_id;
118        *next_id += 1;
119
120        let is_test = name.starts_with("TEST") || name.starts_with("test_");
121        let unit_type = if is_test {
122            CodeUnitType::Test
123        } else {
124            CodeUnitType::Function
125        };
126
127        let mut unit = RawCodeUnit::new(
128            unit_type,
129            Language::Cpp,
130            name,
131            file_path.to_path_buf(),
132            span,
133        );
134        unit.temp_id = id;
135        unit.qualified_name = qname;
136        unit.visibility = Visibility::Public;
137
138        Some(unit)
139    }
140
141    /// Extract a function prototype from a top-level declaration node.
142    fn extract_declaration(
143        &self,
144        node: tree_sitter::Node,
145        source: &str,
146        file_path: &Path,
147        parent_qname: &str,
148        next_id: &mut u64,
149    ) -> Option<RawCodeUnit> {
150        // Only extract if the declaration contains a function_declarator
151        let declarator = find_descendant_by_kind(node, "function_declarator")?;
152        let name_node = declarator.child_by_field_name("declarator")?;
153        let name = get_node_text(name_node, source).to_string();
154        let qname = cpp_qname(parent_qname, &name);
155        let span = node_to_span(node);
156
157        let id = *next_id;
158        *next_id += 1;
159
160        let mut unit = RawCodeUnit::new(
161            CodeUnitType::Function,
162            Language::Cpp,
163            name,
164            file_path.to_path_buf(),
165            span,
166        );
167        unit.temp_id = id;
168        unit.qualified_name = qname;
169        unit.visibility = Visibility::Public;
170
171        Some(unit)
172    }
173
174    fn extract_class_or_struct(
175        &self,
176        node: tree_sitter::Node,
177        source: &str,
178        file_path: &Path,
179        units: &mut Vec<RawCodeUnit>,
180        next_id: &mut u64,
181        parent_qname: &str,
182    ) {
183        let unit_type = CodeUnitType::Type;
184        let name = match node.child_by_field_name("name") {
185            Some(n) => get_node_text(n, source).to_string(),
186            None => return, // anonymous struct/class — skip
187        };
188        let qname = cpp_qname(parent_qname, &name);
189        let span = node_to_span(node);
190
191        let id = *next_id;
192        *next_id += 1;
193
194        let mut unit = RawCodeUnit::new(
195            unit_type,
196            Language::Cpp,
197            name.clone(),
198            file_path.to_path_buf(),
199            span,
200        );
201        unit.temp_id = id;
202        unit.qualified_name = qname.clone();
203        unit.visibility = Visibility::Public;
204        units.push(unit);
205
206        // Recurse into the body to find methods
207        if let Some(body) = node.child_by_field_name("body") {
208            self.extract_class_members(body, source, file_path, units, next_id, &qname);
209        }
210    }
211
212    fn extract_class_members(
213        &self,
214        body: tree_sitter::Node,
215        source: &str,
216        file_path: &Path,
217        units: &mut Vec<RawCodeUnit>,
218        next_id: &mut u64,
219        parent_qname: &str,
220    ) {
221        let mut cursor = body.walk();
222        for child in body.children(&mut cursor) {
223            match child.kind() {
224                "function_definition" => {
225                    if let Some(unit) =
226                        self.extract_function(child, source, file_path, parent_qname, next_id)
227                    {
228                        units.push(unit);
229                    }
230                }
231                "declaration" | "field_declaration" => {
232                    // Check if it's a method declaration inside a class
233                    if let Some(unit) =
234                        self.extract_declaration(child, source, file_path, parent_qname, next_id)
235                    {
236                        units.push(unit);
237                    }
238                }
239                "template_declaration" => {
240                    self.extract_class_members(
241                        child,
242                        source,
243                        file_path,
244                        units,
245                        next_id,
246                        parent_qname,
247                    );
248                }
249                // Nested classes/structs
250                "class_specifier" => {
251                    self.extract_class_or_struct(
252                        child,
253                        source,
254                        file_path,
255                        units,
256                        next_id,
257                        parent_qname,
258                    );
259                }
260                "struct_specifier" => {
261                    self.extract_class_or_struct(
262                        child,
263                        source,
264                        file_path,
265                        units,
266                        next_id,
267                        parent_qname,
268                    );
269                }
270                _ => {}
271            }
272        }
273    }
274
275    fn extract_namespace(
276        &self,
277        node: tree_sitter::Node,
278        source: &str,
279        file_path: &Path,
280        units: &mut Vec<RawCodeUnit>,
281        next_id: &mut u64,
282        parent_qname: &str,
283    ) {
284        let name = node
285            .child_by_field_name("name")
286            .map(|n| get_node_text(n, source).to_string())
287            .unwrap_or_else(|| "(anonymous)".to_string());
288        let qname = cpp_qname(parent_qname, &name);
289        let span = node_to_span(node);
290
291        let id = *next_id;
292        *next_id += 1;
293
294        let mut unit = RawCodeUnit::new(
295            CodeUnitType::Module,
296            Language::Cpp,
297            name,
298            file_path.to_path_buf(),
299            span,
300        );
301        unit.temp_id = id;
302        unit.qualified_name = qname.clone();
303        unit.visibility = Visibility::Public;
304        units.push(unit);
305
306        // Recurse into namespace body
307        if let Some(body) = node.child_by_field_name("body") {
308            self.extract_from_node(body, source, file_path, units, next_id, &qname);
309        }
310    }
311
312    fn extract_enum(
313        &self,
314        node: tree_sitter::Node,
315        source: &str,
316        file_path: &Path,
317        parent_qname: &str,
318        next_id: &mut u64,
319    ) -> Option<RawCodeUnit> {
320        let name_node = node.child_by_field_name("name")?;
321        let name = get_node_text(name_node, source).to_string();
322        let qname = cpp_qname(parent_qname, &name);
323        let span = node_to_span(node);
324
325        let id = *next_id;
326        *next_id += 1;
327
328        let mut unit = RawCodeUnit::new(
329            CodeUnitType::Type,
330            Language::Cpp,
331            name,
332            file_path.to_path_buf(),
333            span,
334        );
335        unit.temp_id = id;
336        unit.qualified_name = qname;
337        unit.visibility = Visibility::Public;
338
339        Some(unit)
340    }
341
342    fn extract_include(
343        &self,
344        node: tree_sitter::Node,
345        source: &str,
346        file_path: &Path,
347        parent_qname: &str,
348        next_id: &mut u64,
349    ) -> Option<RawCodeUnit> {
350        let path_node = node.child_by_field_name("path")?;
351        let include_path = get_node_text(path_node, source).to_string();
352        let span = node_to_span(node);
353
354        let id = *next_id;
355        *next_id += 1;
356
357        let mut unit = RawCodeUnit::new(
358            CodeUnitType::Import,
359            Language::Cpp,
360            include_path,
361            file_path.to_path_buf(),
362            span,
363        );
364        unit.temp_id = id;
365        unit.qualified_name = cpp_qname(parent_qname, "include");
366
367        Some(unit)
368    }
369
370    /// Extract the name from a function_definition node.
371    /// Handles plain functions, qualified names (Foo::bar), and destructors (~Foo).
372    fn function_name(&self, node: tree_sitter::Node, source: &str) -> Option<String> {
373        let declarator = node.child_by_field_name("declarator")?;
374        self.declarator_name(declarator, source)
375    }
376
377    /// Extract the name from a declarator node.
378    fn declarator_name(&self, node: tree_sitter::Node, source: &str) -> Option<String> {
379        declarator_name_inner(node, source)
380    }
381}
382
383impl LanguageParser for CppParser {
384    fn extract_units(
385        &self,
386        tree: &tree_sitter::Tree,
387        source: &str,
388        file_path: &Path,
389    ) -> AcbResult<Vec<RawCodeUnit>> {
390        let mut units = Vec::new();
391        let mut next_id = 0u64;
392
393        let module_name = file_path
394            .file_stem()
395            .and_then(|s| s.to_str())
396            .unwrap_or("unknown")
397            .to_string();
398
399        let root_span = node_to_span(tree.root_node());
400        let mut module_unit = RawCodeUnit::new(
401            CodeUnitType::Module,
402            Language::Cpp,
403            module_name.clone(),
404            file_path.to_path_buf(),
405            root_span,
406        );
407        module_unit.temp_id = next_id;
408        module_unit.qualified_name = module_name.clone();
409        next_id += 1;
410        units.push(module_unit);
411
412        self.extract_from_node(
413            tree.root_node(),
414            source,
415            file_path,
416            &mut units,
417            &mut next_id,
418            &module_name,
419        );
420
421        Ok(units)
422    }
423
424    fn is_test_file(&self, path: &Path, _source: &str) -> bool {
425        let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
426        name.ends_with("_test.cpp")
427            || name.ends_with("_test.cc")
428            || name.starts_with("test_")
429            || name.ends_with("_unittest.cpp")
430            || name.ends_with("_unittest.cc")
431    }
432}
433
434/// Recursively drill into declarator nodes to find the identifier.
435fn declarator_name_inner(node: tree_sitter::Node, source: &str) -> Option<String> {
436    match node.kind() {
437        "function_declarator" => {
438            let inner = node.child_by_field_name("declarator")?;
439            declarator_name_inner(inner, source)
440        }
441        "qualified_identifier" | "scoped_identifier" => {
442            // e.g. Foo::bar — return full qualified text
443            Some(get_node_text(node, source).to_string())
444        }
445        "destructor_name" => Some(get_node_text(node, source).to_string()),
446        "identifier" | "field_identifier" | "operator_name" | "template_function" => {
447            Some(get_node_text(node, source).to_string())
448        }
449        "pointer_declarator" | "reference_declarator" => {
450            // *foo or &foo — drill into child
451            let inner = node.child_by_field_name("declarator")?;
452            declarator_name_inner(inner, source)
453        }
454        _ => {
455            // Fallback: try "declarator" field
456            if let Some(inner) = node.child_by_field_name("declarator") {
457                return declarator_name_inner(inner, source);
458            }
459            None
460        }
461    }
462}
463
464fn cpp_qname(parent: &str, name: &str) -> String {
465    if parent.is_empty() {
466        name.to_string()
467    } else {
468        format!("{}::{}", parent, name)
469    }
470}
471
472/// Find the first descendant with a given kind (DFS).
473fn find_descendant_by_kind<'a>(
474    node: tree_sitter::Node<'a>,
475    kind: &str,
476) -> Option<tree_sitter::Node<'a>> {
477    if node.kind() == kind {
478        return Some(node);
479    }
480    let mut cursor = node.walk();
481    for child in node.children(&mut cursor) {
482        if let Some(found) = find_descendant_by_kind(child, kind) {
483            return Some(found);
484        }
485    }
486    None
487}