Skip to main content

normalize_languages/
cpp.rs

1//! C++ language support.
2
3use crate::{ContainerBody, Import, Language, LanguageSymbols, Visibility};
4use tree_sitter::Node;
5
6/// C++ language support.
7pub struct Cpp;
8
9impl Language for Cpp {
10    fn name(&self) -> &'static str {
11        "C++"
12    }
13    fn extensions(&self) -> &'static [&'static str] {
14        &["cpp", "cc", "cxx", "hpp", "hh", "hxx"]
15    }
16    fn grammar_name(&self) -> &'static str {
17        "cpp"
18    }
19
20    fn as_symbols(&self) -> Option<&dyn LanguageSymbols> {
21        Some(self)
22    }
23
24    fn signature_suffix(&self) -> &'static str {
25        " {}"
26    }
27
28    fn extract_docstring(&self, node: &Node, content: &str) -> Option<String> {
29        let mut prev = node.prev_sibling();
30        while let Some(sibling) = prev {
31            if sibling.kind() == "comment" {
32                let text = &content[sibling.byte_range()];
33                if text.starts_with("/**") {
34                    let lines: Vec<&str> = text
35                        .strip_prefix("/**")
36                        .unwrap_or(text)
37                        .strip_suffix("*/")
38                        .unwrap_or(text)
39                        .lines()
40                        .map(|l| l.trim().strip_prefix('*').unwrap_or(l).trim())
41                        .filter(|l| !l.is_empty())
42                        .collect();
43                    if !lines.is_empty() {
44                        return Some(lines.join(" "));
45                    }
46                }
47                return None;
48            }
49            if sibling.kind() == "template_declaration" {
50                prev = sibling.prev_sibling();
51                continue;
52            }
53            return None;
54        }
55        None
56    }
57
58    fn extract_attributes(&self, node: &Node, content: &str) -> Vec<String> {
59        let mut attrs = Vec::new();
60        let mut cursor = node.walk();
61        for child in node.children(&mut cursor) {
62            match child.kind() {
63                "attribute_declaration" => {
64                    // [[attr]] or [[attr(args)]]
65                    let text = content[child.byte_range()].trim();
66                    attrs.push(text.to_string());
67                }
68                "attribute_specifier" => {
69                    // __attribute__((...))
70                    let text = content[child.byte_range()].trim();
71                    attrs.push(text.to_string());
72                }
73                "ms_declspec_modifier" => {
74                    // __declspec(...)
75                    let text = content[child.byte_range()].trim();
76                    attrs.push(text.to_string());
77                }
78                _ => {}
79            }
80        }
81        attrs
82    }
83
84    fn refine_kind(
85        &self,
86        node: &Node,
87        _content: &str,
88        tag_kind: crate::SymbolKind,
89    ) -> crate::SymbolKind {
90        match node.kind() {
91            "struct_specifier" => crate::SymbolKind::Struct,
92            "enum_specifier" => crate::SymbolKind::Enum,
93            _ => tag_kind,
94        }
95    }
96
97    fn extract_implements(&self, node: &Node, content: &str) -> crate::ImplementsInfo {
98        let mut implements = Vec::new();
99        let mut cursor = node.walk();
100        for child in node.children(&mut cursor) {
101            if child.kind() == "base_class_clause" {
102                let mut bc = child.walk();
103                for base in child.children(&mut bc) {
104                    if base.kind() == "type_identifier" {
105                        implements.push(content[base.byte_range()].to_string());
106                    }
107                }
108            }
109        }
110        crate::ImplementsInfo {
111            is_interface: false,
112            implements,
113        }
114    }
115
116    fn build_signature(&self, node: &Node, content: &str) -> String {
117        match node.kind() {
118            "function_definition" => {
119                if let Some(declarator) = node.child_by_field_name("declarator")
120                    && let Some(name) = find_identifier(&declarator, content)
121                {
122                    return name.to_string();
123                }
124                let text = &content[node.byte_range()];
125                text.lines().next().unwrap_or(text).trim().to_string()
126            }
127            "class_specifier" => {
128                let name = self.node_name(node, content).unwrap_or("");
129                format!("class {}", name)
130            }
131            "struct_specifier" => {
132                let name = self.node_name(node, content).unwrap_or("");
133                format!("struct {}", name)
134            }
135            _ => {
136                let text = &content[node.byte_range()];
137                text.lines().next().unwrap_or(text).trim().to_string()
138            }
139        }
140    }
141
142    fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import> {
143        if node.kind() != "preproc_include" {
144            return Vec::new();
145        }
146
147        let line = node.start_position().row + 1;
148        let mut cursor = node.walk();
149        for child in node.children(&mut cursor) {
150            if child.kind() == "string_literal" || child.kind() == "system_lib_string" {
151                let text = &content[child.byte_range()];
152                let module = text
153                    .trim_matches(|c| c == '"' || c == '<' || c == '>')
154                    .to_string();
155                let is_relative = text.starts_with('"');
156                return vec![Import {
157                    module,
158                    names: Vec::new(),
159                    alias: None,
160                    is_wildcard: false,
161                    is_relative,
162                    line,
163                }];
164            }
165        }
166        Vec::new()
167    }
168
169    fn format_import(&self, import: &Import, _names: Option<&[&str]>) -> String {
170        // C++ uses #include, no multi-imports
171        if import.module.starts_with('<') || import.module.ends_with('>') {
172            format!("#include {}", import.module)
173        } else {
174            format!("#include \"{}\"", import.module)
175        }
176    }
177
178    fn is_test_symbol(&self, symbol: &crate::Symbol) -> bool {
179        let name = symbol.name.as_str();
180        match symbol.kind {
181            crate::SymbolKind::Function | crate::SymbolKind::Method => name.starts_with("test_"),
182            crate::SymbolKind::Module => name == "tests" || name == "test",
183            _ => false,
184        }
185    }
186
187    fn test_file_globs(&self) -> &'static [&'static str] {
188        &[
189            "**/test_*.cpp",
190            "**/*_test.cpp",
191            "**/test_*.cc",
192            "**/*_test.cc",
193            "**/tests/**/*.cpp",
194            "**/tests/**/*.cc",
195        ]
196    }
197
198    fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>> {
199        node.child_by_field_name("body")
200    }
201
202    fn analyze_container_body(
203        &self,
204        body_node: &Node,
205        content: &str,
206        inner_indent: &str,
207    ) -> Option<ContainerBody> {
208        crate::body::analyze_brace_body(body_node, content, inner_indent)
209    }
210
211    fn get_visibility(&self, node: &Node, content: &str) -> Visibility {
212        // C++ visibility is determined by the most recent access_specifier in the enclosing
213        // class body. Classes default to private, structs to public.
214        // Walk backward through siblings to find the nearest access_specifier.
215        let mut prev = node.prev_sibling();
216        while let Some(sibling) = prev {
217            if sibling.kind() == "access_specifier" {
218                let spec = content[sibling.byte_range()].trim().trim_end_matches(':');
219                return match spec {
220                    "public" => Visibility::Public,
221                    "protected" => Visibility::Protected,
222                    "private" => Visibility::Private,
223                    _ => Visibility::Public,
224                };
225            }
226            prev = sibling.prev_sibling();
227        }
228        // No access_specifier found: check if parent is struct (public) or class (private).
229        // If we can't determine, default to Public (safe for analysis).
230        if node
231            .parent()
232            .and_then(|p| p.parent())
233            .map(|g| g.kind() == "class_specifier")
234            .unwrap_or(false)
235        {
236            return Visibility::Private;
237        }
238        Visibility::Public
239    }
240
241    fn node_name<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str> {
242        if let Some(name_node) = node.child_by_field_name("name") {
243            return Some(&content[name_node.byte_range()]);
244        }
245        if let Some(declarator) = node.child_by_field_name("declarator") {
246            return find_identifier(&declarator, content);
247        }
248        None
249    }
250}
251
252impl LanguageSymbols for Cpp {}
253
254fn find_identifier<'a>(node: &Node, content: &'a str) -> Option<&'a str> {
255    if node.kind() == "identifier" || node.kind() == "field_identifier" {
256        return Some(&content[node.byte_range()]);
257    }
258    let mut cursor = node.walk();
259    for child in node.children(&mut cursor) {
260        if let Some(id) = find_identifier(&child, content) {
261            return Some(id);
262        }
263    }
264    None
265}
266
267#[cfg(test)]
268mod tests {
269    use super::*;
270    use crate::validate_unused_kinds_audit;
271
272    /// Documents node kinds that exist in the C++ grammar but aren't used in trait methods.
273    /// Run `cross_check_node_kinds` in registry.rs to see all potentially useful kinds.
274    #[test]
275    fn unused_node_kinds_audit() {
276        #[rustfmt::skip]
277        let documented_unused: &[&str] = &[
278            // STRUCTURAL (C++ adds many to C)
279            "access_specifier",        // public:, private:
280            "base_class_clause",       // : public Base
281            "bitfield_clause",         // : width
282            "condition_clause",        // if condition             // declaration
283            "declaration_list",        // decl list
284            "default_method_clause",   // = default
285            "delete_method_clause",    // = delete
286            "dependent_type",          // typename T::X
287            "destructor_name",         // ~Foo
288            "enumerator",              // enum value
289            "enumerator_list",         // enum body
290            "field_declaration",       // struct field
291            "field_declaration_list",  // struct body
292            "field_expression",        // foo.bar
293            "field_identifier",        // field name
294            "identifier",              // too common
295            "init_statement",          // for init
296            "linkage_specification",   // extern "C"
297            "module_name",             // module name
298            "module_partition",        // module partition
299            "namespace_identifier",    // namespace name
300            "nested_namespace_specifier", // ns1::ns2
301            "operator_name",           // operator+
302            "parameter_declaration",   // param decl
303            "primitive_type",          // int, char
304            "pure_virtual_clause",     // = 0
305            "ref_qualifier",           // &, &&
306            "sized_type_specifier",    // unsigned int
307            "statement_identifier",    // label name
308            "static_assert_declaration", // static_assert
309            "storage_class_specifier", // static, extern
310            "structured_binding_declarator", // auto [a, b]
311            "type_descriptor",         // type desc
312            "type_identifier",         // type name
313            "type_parameter_declaration", // template param
314            "type_qualifier",          // const, volatile
315            "union_specifier",         // union
316            "using_declaration",       // using ns::name
317            "variadic_parameter_declaration", // T...
318            "variadic_type_parameter_declaration", // typename...
319            "virtual_specifier",       // override, final
320
321            // CLAUSE
322            "else_clause",             // else
323            "noexcept",                // noexcept
324
325            // EXPRESSION (C++ adds many to C)
326            "alignof_expression",      // alignof(T)
327            "assignment_expression",   // x = y
328            "binary_expression",       // a + b
329            "call_expression",         // foo()
330            "cast_expression",         // (T)x
331            "co_await_expression",     // co_await x
332            "co_return_statement",     // co_return
333            "co_yield_statement",      // co_yield x
334            "comma_expression",        // a, b
335            "compound_literal_expression", // (T){...}
336            "delete_expression",       // delete x
337            "extension_expression",    // __extension__
338            "fold_expression",         // (... + args)
339            "generic_expression",      // _Generic
340            "gnu_asm_expression",      // asm()
341            "new_expression",          // new T
342            "offsetof_expression",     // offsetof
343            "parenthesized_expression",// (expr)
344            "pointer_expression",      // *p, &x
345            "reflect_expression",      // reflexpr
346            "sizeof_expression",       // sizeof(T)
347            "splice_expression",       // [:expr:]
348            "subscript_expression",    // arr[i]
349            "unary_expression",        // -x, !x
350            "update_expression",       // x++
351
352            // TEMPLATE
353            "template_declaration",    // template<>
354            "template_function",       // template func
355            "template_method",         // template method
356            "template_template_parameter_declaration", // template template
357            "template_type",           // T<U>
358
359            // LAMBDA
360            "lambda_capture_initializer", // [x = y]
361            "lambda_capture_specifier",   // [=], [&]
362            "lambda_declarator",       // lambda params
363            "lambda_default_capture",  // =, &
364            "lambda_specifier",        // mutable
365
366            // FUNCTION
367            "abstract_function_declarator", // abstract func
368            "explicit_function_specifier", // explicit
369            "explicit_object_parameter_declaration", // this param     // func decl
370            "operator_cast",           // operator T()
371            "optional_parameter_declaration", // param = default
372            "optional_type_parameter_declaration", // T = U
373            "placeholder_type_specifier", // auto, decltype(auto)
374            "pointer_type_declarator", // ptr declarator
375            "trailing_return_type",    // -> T
376
377            // CONCEPTS/REQUIRES
378            "concept_definition",      // concept
379            "requires_clause",         // requires
380            "requires_expression",     // requires {}
381            "type_requirement",        // typename T
382
383            // MODULE
384            "export_declaration",      // export
385            "global_module_fragment_declaration", // module;
386            "import_declaration",      // import
387            "module_declaration",      // module
388            "private_module_fragment_declaration", // module :private
389
390            // PREPROCESSOR
391            "preproc_elif",            // #elif
392            "preproc_elifdef",         // #elifdef
393            "preproc_else",            // #else
394            "preproc_function_def",    // function macro
395            "preproc_if",              // #if
396            "preproc_ifdef",           // #ifdef
397
398            // SPLICE
399            "splice_specifier",        // [: :] specifier
400            "splice_type_specifier",   // [: :] type
401
402            // OTHER
403            "alias_declaration",       // using X = Y
404            "alignas_qualifier",       // alignas
405            "attribute_declaration",   // [[attr]] — used by extract_attributes (not detectable by audit)
406            "attribute_specifier",     // __attribute__ — used by extract_attributes
407            "attributed_statement",    // stmt with attr
408            "consteval_block_declaration", // consteval
409            "decltype",                // decltype
410            "expansion_statement",     // pack expansion stmt
411            "expression_statement",    // expr;
412            "friend_declaration",      // friend
413            "gnu_asm_qualifier",       // asm qualifiers
414            "labeled_statement",       // label:
415            "namespace_alias_definition", // namespace X = Y
416            "qualified_identifier",    // ns::name
417            "throw_specifier",         // throw()
418
419            // MS EXTENSIONS
420            "ms_based_modifier",       // __based
421            "ms_call_modifier",        // __cdecl
422            "ms_declspec_modifier",    // __declspec — used by extract_attributes
423            "ms_pointer_modifier",     // __ptr32
424            "ms_restrict_modifier",    // __restrict
425            "ms_signed_ptr_modifier",  // __sptr
426            "ms_unaligned_ptr_modifier", // __unaligned
427            "ms_unsigned_ptr_modifier", // __uptr
428
429            // SEH
430            "seh_except_clause",       // __except
431            "seh_finally_clause",      // __finally
432            "seh_leave_statement",     // __leave
433            "seh_try_statement",       // __try
434            // covered by tags.scm
435            "function_definition",
436            "case_statement",
437            "for_range_loop",
438            "conditional_expression",
439            "do_statement",
440            "if_statement",
441            "catch_clause",
442            "while_statement",
443            "lambda_expression",
444            "continue_statement",
445            "switch_statement",
446            "throw_statement",
447            "try_statement",
448            "return_statement",
449            "break_statement",
450            "compound_statement",
451            "namespace_definition",
452            "goto_statement",
453            "for_statement",
454        ];
455
456        validate_unused_kinds_audit(&Cpp, documented_unused)
457            .expect("C++ unused node kinds audit failed");
458    }
459}