Skip to main content

agentic_codebase/parse/
cpp.rs

1//! C++ parsing using tree-sitter.
2//!
3//! Extracts functions, classes, structs, namespaces, enums, templates, includes.
4
5use std::path::Path;
6
7use crate::types::{AcbResult, CodeUnitType, Language, Visibility};
8
9use super::treesitter::{get_node_text, node_to_span};
10use super::{LanguageParser, RawCodeUnit};
11
12/// C++ language parser.
13pub struct CppParser;
14
15impl Default for CppParser {
16    fn default() -> Self {
17        Self::new()
18    }
19}
20
21impl CppParser {
22    /// Create a new C++ parser.
23    pub fn new() -> Self {
24        Self
25    }
26
27    fn extract_from_node(
28        &self,
29        node: tree_sitter::Node,
30        source: &str,
31        file_path: &Path,
32        units: &mut Vec<RawCodeUnit>,
33        next_id: &mut u64,
34        parent_qname: &str,
35    ) {
36        let mut cursor = node.walk();
37        for child in node.children(&mut cursor) {
38            match child.kind() {
39                "function_definition" => {
40                    if let Some(unit) =
41                        self.extract_function(child, source, file_path, parent_qname, next_id)
42                    {
43                        units.push(unit);
44                    }
45                }
46                "declaration" => {
47                    // A declaration can contain a function declarator (forward decl / prototype)
48                    // or a variable. We extract function prototypes.
49                    if let Some(unit) =
50                        self.extract_declaration(child, source, file_path, parent_qname, next_id)
51                    {
52                        units.push(unit);
53                    }
54                }
55                "class_specifier" => {
56                    self.extract_class_or_struct(
57                        child,
58                        source,
59                        file_path,
60                        units,
61                        next_id,
62                        parent_qname,
63                    );
64                }
65                "struct_specifier" => {
66                    self.extract_class_or_struct(
67                        child,
68                        source,
69                        file_path,
70                        units,
71                        next_id,
72                        parent_qname,
73                    );
74                }
75                "namespace_definition" => {
76                    self.extract_namespace(
77                        child,
78                        source,
79                        file_path,
80                        units,
81                        next_id,
82                        parent_qname,
83                    );
84                }
85                "enum_specifier" => {
86                    if let Some(unit) =
87                        self.extract_enum(child, source, file_path, parent_qname, next_id)
88                    {
89                        units.push(unit);
90                    }
91                }
92                "template_declaration" => {
93                    // Recurse into the template body to find the actual declaration
94                    self.extract_from_node(
95                        child,
96                        source,
97                        file_path,
98                        units,
99                        next_id,
100                        parent_qname,
101                    );
102                }
103                "preproc_include" => {
104                    if let Some(unit) =
105                        self.extract_include(child, source, file_path, parent_qname, next_id)
106                    {
107                        units.push(unit);
108                    }
109                }
110                // Top-level declarations wrapped in linkage_specification (extern "C" { ... })
111                "linkage_specification" => {
112                    self.extract_from_node(
113                        child,
114                        source,
115                        file_path,
116                        units,
117                        next_id,
118                        parent_qname,
119                    );
120                }
121                _ => {}
122            }
123        }
124    }
125
126    fn extract_function(
127        &self,
128        node: tree_sitter::Node,
129        source: &str,
130        file_path: &Path,
131        parent_qname: &str,
132        next_id: &mut u64,
133    ) -> Option<RawCodeUnit> {
134        let name = self.function_name(node, source)?;
135        let qname = cpp_qname(parent_qname, &name);
136        let span = node_to_span(node);
137
138        let id = *next_id;
139        *next_id += 1;
140
141        let is_test = name.starts_with("TEST") || name.starts_with("test_");
142        let unit_type = if is_test {
143            CodeUnitType::Test
144        } else {
145            CodeUnitType::Function
146        };
147
148        let mut unit =
149            RawCodeUnit::new(unit_type, Language::Cpp, name, file_path.to_path_buf(), span);
150        unit.temp_id = id;
151        unit.qualified_name = qname;
152        unit.visibility = Visibility::Public;
153
154        Some(unit)
155    }
156
157    /// Extract a function prototype from a top-level declaration node.
158    fn extract_declaration(
159        &self,
160        node: tree_sitter::Node,
161        source: &str,
162        file_path: &Path,
163        parent_qname: &str,
164        next_id: &mut u64,
165    ) -> Option<RawCodeUnit> {
166        // Only extract if the declaration contains a function_declarator
167        let declarator = find_descendant_by_kind(node, "function_declarator")?;
168        let name_node = declarator.child_by_field_name("declarator")?;
169        let name = get_node_text(name_node, source).to_string();
170        let qname = cpp_qname(parent_qname, &name);
171        let span = node_to_span(node);
172
173        let id = *next_id;
174        *next_id += 1;
175
176        let mut unit = RawCodeUnit::new(
177            CodeUnitType::Function,
178            Language::Cpp,
179            name,
180            file_path.to_path_buf(),
181            span,
182        );
183        unit.temp_id = id;
184        unit.qualified_name = qname;
185        unit.visibility = Visibility::Public;
186
187        Some(unit)
188    }
189
190    fn extract_class_or_struct(
191        &self,
192        node: tree_sitter::Node,
193        source: &str,
194        file_path: &Path,
195        units: &mut Vec<RawCodeUnit>,
196        next_id: &mut u64,
197        parent_qname: &str,
198    ) {
199        let unit_type = CodeUnitType::Type;
200        let name = match node.child_by_field_name("name") {
201            Some(n) => get_node_text(n, source).to_string(),
202            None => return, // anonymous struct/class — skip
203        };
204        let qname = cpp_qname(parent_qname, &name);
205        let span = node_to_span(node);
206
207        let id = *next_id;
208        *next_id += 1;
209
210        let mut unit = RawCodeUnit::new(
211            unit_type,
212            Language::Cpp,
213            name.clone(),
214            file_path.to_path_buf(),
215            span,
216        );
217        unit.temp_id = id;
218        unit.qualified_name = qname.clone();
219        unit.visibility = Visibility::Public;
220        units.push(unit);
221
222        // Recurse into the body to find methods
223        if let Some(body) = node.child_by_field_name("body") {
224            self.extract_class_members(body, source, file_path, units, next_id, &qname);
225        }
226    }
227
228    fn extract_class_members(
229        &self,
230        body: tree_sitter::Node,
231        source: &str,
232        file_path: &Path,
233        units: &mut Vec<RawCodeUnit>,
234        next_id: &mut u64,
235        parent_qname: &str,
236    ) {
237        let mut cursor = body.walk();
238        for child in body.children(&mut cursor) {
239            match child.kind() {
240                "function_definition" => {
241                    if let Some(unit) =
242                        self.extract_function(child, source, file_path, parent_qname, next_id)
243                    {
244                        units.push(unit);
245                    }
246                }
247                "declaration" | "field_declaration" => {
248                    // Check if it's a method declaration inside a class
249                    if let Some(unit) = self.extract_declaration(
250                        child,
251                        source,
252                        file_path,
253                        parent_qname,
254                        next_id,
255                    ) {
256                        units.push(unit);
257                    }
258                }
259                "template_declaration" => {
260                    self.extract_class_members(
261                        child,
262                        source,
263                        file_path,
264                        units,
265                        next_id,
266                        parent_qname,
267                    );
268                }
269                // Nested classes/structs
270                "class_specifier" => {
271                    self.extract_class_or_struct(
272                        child,
273                        source,
274                        file_path,
275                        units,
276                        next_id,
277                        parent_qname,
278                    );
279                }
280                "struct_specifier" => {
281                    self.extract_class_or_struct(
282                        child,
283                        source,
284                        file_path,
285                        units,
286                        next_id,
287                        parent_qname,
288                    );
289                }
290                _ => {}
291            }
292        }
293    }
294
295    fn extract_namespace(
296        &self,
297        node: tree_sitter::Node,
298        source: &str,
299        file_path: &Path,
300        units: &mut Vec<RawCodeUnit>,
301        next_id: &mut u64,
302        parent_qname: &str,
303    ) {
304        let name = node
305            .child_by_field_name("name")
306            .map(|n| get_node_text(n, source).to_string())
307            .unwrap_or_else(|| "(anonymous)".to_string());
308        let qname = cpp_qname(parent_qname, &name);
309        let span = node_to_span(node);
310
311        let id = *next_id;
312        *next_id += 1;
313
314        let mut unit = RawCodeUnit::new(
315            CodeUnitType::Module,
316            Language::Cpp,
317            name,
318            file_path.to_path_buf(),
319            span,
320        );
321        unit.temp_id = id;
322        unit.qualified_name = qname.clone();
323        unit.visibility = Visibility::Public;
324        units.push(unit);
325
326        // Recurse into namespace body
327        if let Some(body) = node.child_by_field_name("body") {
328            self.extract_from_node(body, source, file_path, units, next_id, &qname);
329        }
330    }
331
332    fn extract_enum(
333        &self,
334        node: tree_sitter::Node,
335        source: &str,
336        file_path: &Path,
337        parent_qname: &str,
338        next_id: &mut u64,
339    ) -> Option<RawCodeUnit> {
340        let name_node = node.child_by_field_name("name")?;
341        let name = get_node_text(name_node, source).to_string();
342        let qname = cpp_qname(parent_qname, &name);
343        let span = node_to_span(node);
344
345        let id = *next_id;
346        *next_id += 1;
347
348        let mut unit =
349            RawCodeUnit::new(CodeUnitType::Type, Language::Cpp, name, file_path.to_path_buf(), span);
350        unit.temp_id = id;
351        unit.qualified_name = qname;
352        unit.visibility = Visibility::Public;
353
354        Some(unit)
355    }
356
357    fn extract_include(
358        &self,
359        node: tree_sitter::Node,
360        source: &str,
361        file_path: &Path,
362        parent_qname: &str,
363        next_id: &mut u64,
364    ) -> Option<RawCodeUnit> {
365        let path_node = node.child_by_field_name("path")?;
366        let include_path = get_node_text(path_node, source).to_string();
367        let span = node_to_span(node);
368
369        let id = *next_id;
370        *next_id += 1;
371
372        let mut unit = RawCodeUnit::new(
373            CodeUnitType::Import,
374            Language::Cpp,
375            include_path,
376            file_path.to_path_buf(),
377            span,
378        );
379        unit.temp_id = id;
380        unit.qualified_name = cpp_qname(parent_qname, "include");
381
382        Some(unit)
383    }
384
385    /// Extract the name from a function_definition node.
386    /// Handles plain functions, qualified names (Foo::bar), and destructors (~Foo).
387    fn function_name(&self, node: tree_sitter::Node, source: &str) -> Option<String> {
388        let declarator = node.child_by_field_name("declarator")?;
389        self.declarator_name(declarator, source)
390    }
391
392    /// Extract the name from a declarator node.
393    fn declarator_name(&self, node: tree_sitter::Node, source: &str) -> Option<String> {
394        declarator_name_inner(node, source)
395    }
396}
397
398impl LanguageParser for CppParser {
399    fn extract_units(
400        &self,
401        tree: &tree_sitter::Tree,
402        source: &str,
403        file_path: &Path,
404    ) -> AcbResult<Vec<RawCodeUnit>> {
405        let mut units = Vec::new();
406        let mut next_id = 0u64;
407
408        let module_name = file_path
409            .file_stem()
410            .and_then(|s| s.to_str())
411            .unwrap_or("unknown")
412            .to_string();
413
414        let root_span = node_to_span(tree.root_node());
415        let mut module_unit = RawCodeUnit::new(
416            CodeUnitType::Module,
417            Language::Cpp,
418            module_name.clone(),
419            file_path.to_path_buf(),
420            root_span,
421        );
422        module_unit.temp_id = next_id;
423        module_unit.qualified_name = module_name.clone();
424        next_id += 1;
425        units.push(module_unit);
426
427        self.extract_from_node(
428            tree.root_node(),
429            source,
430            file_path,
431            &mut units,
432            &mut next_id,
433            &module_name,
434        );
435
436        Ok(units)
437    }
438
439    fn is_test_file(&self, path: &Path, _source: &str) -> bool {
440        let name = path
441            .file_name()
442            .and_then(|n| n.to_str())
443            .unwrap_or("");
444        name.ends_with("_test.cpp")
445            || name.ends_with("_test.cc")
446            || name.starts_with("test_")
447            || name.ends_with("_unittest.cpp")
448            || name.ends_with("_unittest.cc")
449    }
450}
451
452/// Recursively drill into declarator nodes to find the identifier.
453fn declarator_name_inner(node: tree_sitter::Node, source: &str) -> Option<String> {
454    match node.kind() {
455        "function_declarator" => {
456            let inner = node.child_by_field_name("declarator")?;
457            declarator_name_inner(inner, source)
458        }
459        "qualified_identifier" | "scoped_identifier" => {
460            // e.g. Foo::bar — return full qualified text
461            Some(get_node_text(node, source).to_string())
462        }
463        "destructor_name" => Some(get_node_text(node, source).to_string()),
464        "identifier" | "field_identifier" | "operator_name" | "template_function" => {
465            Some(get_node_text(node, source).to_string())
466        }
467        "pointer_declarator" | "reference_declarator" => {
468            // *foo or &foo — drill into child
469            let inner = node.child_by_field_name("declarator")?;
470            declarator_name_inner(inner, source)
471        }
472        _ => {
473            // Fallback: try "declarator" field
474            if let Some(inner) = node.child_by_field_name("declarator") {
475                return declarator_name_inner(inner, source);
476            }
477            None
478        }
479    }
480}
481
482fn cpp_qname(parent: &str, name: &str) -> String {
483    if parent.is_empty() {
484        name.to_string()
485    } else {
486        format!("{}::{}", parent, name)
487    }
488}
489
490/// Find the first descendant with a given kind (DFS).
491fn find_descendant_by_kind<'a>(
492    node: tree_sitter::Node<'a>,
493    kind: &str,
494) -> Option<tree_sitter::Node<'a>> {
495    if node.kind() == kind {
496        return Some(node);
497    }
498    let mut cursor = node.walk();
499    for child in node.children(&mut cursor) {
500        if let Some(found) = find_descendant_by_kind(child, kind) {
501            return Some(found);
502        }
503    }
504    None
505}