Skip to main content

normalize_languages/
cpp.rs

1//! C++ language support.
2
3use crate::c_cpp;
4use crate::external_packages::ResolvedPackage;
5use crate::{Export, Import, Language, Symbol, SymbolKind, Visibility, VisibilityMechanism};
6use std::path::{Path, PathBuf};
7use tree_sitter::Node;
8
9/// C++ language support.
10pub struct Cpp;
11
12impl Language for Cpp {
13    fn name(&self) -> &'static str {
14        "C++"
15    }
16    fn extensions(&self) -> &'static [&'static str] {
17        &["cpp", "cc", "cxx", "hpp", "hh", "hxx"]
18    }
19    fn grammar_name(&self) -> &'static str {
20        "cpp"
21    }
22
23    fn has_symbols(&self) -> bool {
24        true
25    }
26
27    fn container_kinds(&self) -> &'static [&'static str] {
28        &["class_specifier", "struct_specifier"]
29    }
30    fn function_kinds(&self) -> &'static [&'static str] {
31        &["function_definition"]
32    }
33    fn type_kinds(&self) -> &'static [&'static str] {
34        &[
35            "class_specifier",
36            "struct_specifier",
37            "enum_specifier",
38            "type_definition",
39        ]
40    }
41    fn import_kinds(&self) -> &'static [&'static str] {
42        &["preproc_include"]
43    }
44
45    fn public_symbol_kinds(&self) -> &'static [&'static str] {
46        &["function_definition", "class_specifier", "struct_specifier"]
47    }
48
49    fn visibility_mechanism(&self) -> VisibilityMechanism {
50        VisibilityMechanism::HeaderBased // Also has public/private in classes, but header-based is primary
51    }
52    fn scope_creating_kinds(&self) -> &'static [&'static str] {
53        &[
54            "for_statement",
55            "for_range_loop",
56            "while_statement",
57            "compound_statement",
58            "lambda_expression",
59            "namespace_definition",
60        ]
61    }
62
63    fn control_flow_kinds(&self) -> &'static [&'static str] {
64        &[
65            "if_statement",
66            "for_statement",
67            "for_range_loop",
68            "while_statement",
69            "do_statement",
70            "switch_statement",
71            "return_statement",
72            "break_statement",
73            "continue_statement",
74            "throw_statement",
75            "goto_statement",
76            "try_statement",
77        ]
78    }
79
80    fn complexity_nodes(&self) -> &'static [&'static str] {
81        &[
82            "if_statement",
83            "for_statement",
84            "for_range_loop",
85            "while_statement",
86            "do_statement",
87            "switch_statement",
88            "case_statement",
89            "try_statement",
90            "catch_clause",
91            "throw_statement",
92            "&&",
93            "||",
94            "conditional_expression",
95        ]
96    }
97
98    fn nesting_nodes(&self) -> &'static [&'static str] {
99        &[
100            "if_statement",
101            "for_statement",
102            "for_range_loop",
103            "while_statement",
104            "do_statement",
105            "switch_statement",
106            "try_statement",
107            "function_definition",
108            "class_specifier",
109            "struct_specifier",
110            "namespace_definition",
111            "lambda_expression",
112        ]
113    }
114
115    fn signature_suffix(&self) -> &'static str {
116        " {}"
117    }
118
119    fn extract_function(&self, node: &Node, content: &str, in_container: bool) -> Option<Symbol> {
120        let declarator = node.child_by_field_name("declarator")?;
121        let name = find_identifier(&declarator, content)?;
122
123        Some(Symbol {
124            name: name.to_string(),
125            kind: if in_container {
126                SymbolKind::Method
127            } else {
128                SymbolKind::Function
129            },
130            signature: name.to_string(),
131            docstring: None,
132            attributes: Vec::new(),
133            start_line: node.start_position().row + 1,
134            end_line: node.end_position().row + 1,
135            visibility: Visibility::Public,
136            children: Vec::new(),
137            is_interface_impl: false,
138            implements: Vec::new(),
139        })
140    }
141
142    fn extract_container(&self, node: &Node, content: &str) -> Option<Symbol> {
143        let name = self.node_name(node, content)?;
144        let kind = if node.kind() == "class_specifier" {
145            SymbolKind::Class
146        } else {
147            SymbolKind::Struct
148        };
149
150        Some(Symbol {
151            name: name.to_string(),
152            kind,
153            signature: format!("{} {}", kind.as_str(), name),
154            docstring: None,
155            attributes: Vec::new(),
156            start_line: node.start_position().row + 1,
157            end_line: node.end_position().row + 1,
158            visibility: Visibility::Public,
159            children: Vec::new(),
160            is_interface_impl: false,
161            implements: Vec::new(),
162        })
163    }
164
165    fn extract_type(&self, node: &Node, content: &str) -> Option<Symbol> {
166        self.extract_container(node, content)
167    }
168
169    fn extract_docstring(&self, _node: &Node, _content: &str) -> Option<String> {
170        None
171    }
172
173    fn extract_attributes(&self, _node: &Node, _content: &str) -> Vec<String> {
174        Vec::new()
175    }
176
177    fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import> {
178        if node.kind() != "preproc_include" {
179            return Vec::new();
180        }
181
182        let line = node.start_position().row + 1;
183        let mut cursor = node.walk();
184        for child in node.children(&mut cursor) {
185            if child.kind() == "string_literal" || child.kind() == "system_lib_string" {
186                let text = &content[child.byte_range()];
187                let module = text
188                    .trim_matches(|c| c == '"' || c == '<' || c == '>')
189                    .to_string();
190                let is_relative = text.starts_with('"');
191                return vec![Import {
192                    module,
193                    names: Vec::new(),
194                    alias: None,
195                    is_wildcard: false,
196                    is_relative,
197                    line,
198                }];
199            }
200        }
201        Vec::new()
202    }
203
204    fn format_import(&self, import: &Import, _names: Option<&[&str]>) -> String {
205        // C++ uses #include, no multi-imports
206        if import.module.starts_with('<') || import.module.ends_with('>') {
207            format!("#include {}", import.module)
208        } else {
209            format!("#include \"{}\"", import.module)
210        }
211    }
212
213    fn extract_public_symbols(&self, node: &Node, content: &str) -> Vec<Export> {
214        let kind = match node.kind() {
215            "function_definition" => SymbolKind::Function,
216            "class_specifier" => SymbolKind::Class,
217            "struct_specifier" => SymbolKind::Struct,
218            _ => return Vec::new(),
219        };
220
221        if let Some(name) = self.node_name(node, content) {
222            vec![Export {
223                name: name.to_string(),
224                kind,
225                line: node.start_position().row + 1,
226            }]
227        } else {
228            Vec::new()
229        }
230    }
231
232    fn is_public(&self, _node: &Node, _content: &str) -> bool {
233        true // Header-based visibility
234    }
235
236    fn get_visibility(&self, _node: &Node, _content: &str) -> Visibility {
237        Visibility::Public
238    }
239
240    fn is_test_symbol(&self, symbol: &crate::Symbol) -> bool {
241        let name = symbol.name.as_str();
242        match symbol.kind {
243            crate::SymbolKind::Function | crate::SymbolKind::Method => name.starts_with("test_"),
244            crate::SymbolKind::Module => name == "tests" || name == "test",
245            _ => false,
246        }
247    }
248
249    fn embedded_content(&self, _node: &Node, _content: &str) -> Option<crate::EmbeddedBlock> {
250        None
251    }
252
253    fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>> {
254        node.child_by_field_name("body")
255    }
256
257    fn body_has_docstring(&self, _body: &Node, _content: &str) -> bool {
258        false
259    }
260
261    fn node_name<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str> {
262        if let Some(name_node) = node.child_by_field_name("name") {
263            return Some(&content[name_node.byte_range()]);
264        }
265        if let Some(declarator) = node.child_by_field_name("declarator") {
266            return find_identifier(&declarator, content);
267        }
268        None
269    }
270
271    fn file_path_to_module_name(&self, path: &Path) -> Option<String> {
272        let ext = path.extension()?.to_str()?;
273        if !["cpp", "cc", "cxx", "hpp", "hh", "hxx", "h"].contains(&ext) {
274            return None;
275        }
276        Some(path.to_string_lossy().to_string())
277    }
278
279    fn module_name_to_paths(&self, module: &str) -> Vec<String> {
280        vec![module.to_string()]
281    }
282
283    fn is_stdlib_import(&self, include: &str, _project_root: &Path) -> bool {
284        // C++ standard library headers (no extension)
285        let stdlib = [
286            "iostream",
287            "vector",
288            "string",
289            "map",
290            "set",
291            "algorithm",
292            "memory",
293            "utility",
294            "functional",
295            "iterator",
296            "numeric",
297            "cstdio",
298            "cstdlib",
299            "cstring",
300            "cmath",
301            "climits",
302        ];
303        stdlib.contains(&include)
304    }
305
306    fn find_package_cache(&self, _project_root: &Path) -> Option<PathBuf> {
307        None
308    }
309
310    fn find_stdlib(&self, _project_root: &Path) -> Option<PathBuf> {
311        c_cpp::find_cpp_include_paths().into_iter().next()
312    }
313
314    fn package_module_name(&self, entry_name: &str) -> String {
315        entry_name.to_string()
316    }
317
318    fn discover_packages(&self, source: &crate::PackageSource) -> Vec<(String, PathBuf)> {
319        self.discover_recursive_packages(&source.path, &source.path)
320    }
321
322    fn find_package_entry(&self, path: &Path) -> Option<PathBuf> {
323        if path.is_file() {
324            Some(path.to_path_buf())
325        } else {
326            None
327        }
328    }
329
330    // === Import Resolution ===
331
332    fn lang_key(&self) -> &'static str {
333        "cpp"
334    }
335
336    fn resolve_local_import(
337        &self,
338        include: &str,
339        current_file: &Path,
340        _project_root: &Path,
341    ) -> Option<PathBuf> {
342        // Strip quotes if present
343        let header = include
344            .trim_start_matches('"')
345            .trim_end_matches('"')
346            .trim_start_matches('<')
347            .trim_end_matches('>');
348
349        let current_dir = current_file.parent()?;
350
351        // Try relative to current file's directory
352        let relative = current_dir.join(header);
353        if relative.is_file() {
354            return Some(relative);
355        }
356
357        // Try with common extensions if none specified
358        if !header.contains('.') {
359            for ext in &[".h", ".hpp", ".hxx", ".hh"] {
360                let with_ext = current_dir.join(format!("{}{}", header, ext));
361                if with_ext.is_file() {
362                    return Some(with_ext);
363                }
364            }
365        }
366
367        None
368    }
369
370    fn resolve_external_import(
371        &self,
372        include: &str,
373        _project_root: &Path,
374    ) -> Option<ResolvedPackage> {
375        let include_paths = c_cpp::find_cpp_include_paths();
376        c_cpp::resolve_cpp_include(include, &include_paths)
377    }
378
379    fn get_version(&self, _project_root: &Path) -> Option<String> {
380        c_cpp::get_gcc_version()
381    }
382
383    fn indexable_extensions(&self) -> &'static [&'static str] {
384        &["cpp", "hpp", "cc", "hh", "cxx", "hxx", "h"]
385    }
386
387    fn package_sources(&self, _project_root: &Path) -> Vec<crate::PackageSource> {
388        use crate::{PackageSource, PackageSourceKind};
389        c_cpp::find_cpp_include_paths()
390            .into_iter()
391            .map(|path| PackageSource {
392                name: "includes",
393                path,
394                kind: PackageSourceKind::Recursive,
395                version_specific: false,
396            })
397            .collect()
398    }
399
400    fn should_skip_package_entry(&self, name: &str, is_dir: bool) -> bool {
401        use crate::traits::skip_dotfiles;
402        if skip_dotfiles(name) {
403            return true;
404        }
405        // Skip the "bits" directory (C++ internal headers)
406        if is_dir && name == "bits" {
407            return true;
408        }
409        if is_dir {
410            return false;
411        }
412        // Check if it's a valid header: explicit extensions or extensionless stdlib headers
413        let is_header = name.ends_with(".h")
414            || name.ends_with(".hpp")
415            || name.ends_with(".hxx")
416            || name.ends_with(".hh")
417            // C++ standard library headers (no extension, like vector, iostream)
418            || (!name.contains('.') && !name.contains('-')
419                && name.chars().all(|c| c.is_ascii_alphanumeric() || c == '_'));
420        !is_header
421    }
422}
423
424fn find_identifier<'a>(node: &Node, content: &'a str) -> Option<&'a str> {
425    if node.kind() == "identifier" || node.kind() == "field_identifier" {
426        return Some(&content[node.byte_range()]);
427    }
428    let mut cursor = node.walk();
429    for child in node.children(&mut cursor) {
430        if let Some(id) = find_identifier(&child, content) {
431            return Some(id);
432        }
433    }
434    None
435}
436
437#[cfg(test)]
438mod tests {
439    use super::*;
440    use crate::validate_unused_kinds_audit;
441
442    /// Documents node kinds that exist in the C++ grammar but aren't used in trait methods.
443    /// Run `cross_check_node_kinds` in registry.rs to see all potentially useful kinds.
444    #[test]
445    fn unused_node_kinds_audit() {
446        #[rustfmt::skip]
447        let documented_unused: &[&str] = &[
448            // STRUCTURAL (C++ adds many to C)
449            "access_specifier",        // public:, private:
450            "base_class_clause",       // : public Base
451            "bitfield_clause",         // : width
452            "condition_clause",        // if condition
453            "declaration",             // declaration
454            "declaration_list",        // decl list
455            "default_method_clause",   // = default
456            "delete_method_clause",    // = delete
457            "dependent_type",          // typename T::X
458            "destructor_name",         // ~Foo
459            "enumerator",              // enum value
460            "enumerator_list",         // enum body
461            "field_declaration",       // struct field
462            "field_declaration_list",  // struct body
463            "field_expression",        // foo.bar
464            "field_identifier",        // field name
465            "identifier",              // too common
466            "init_statement",          // for init
467            "linkage_specification",   // extern "C"
468            "module_name",             // module name
469            "module_partition",        // module partition
470            "namespace_identifier",    // namespace name
471            "nested_namespace_specifier", // ns1::ns2
472            "operator_name",           // operator+
473            "parameter_declaration",   // param decl
474            "primitive_type",          // int, char
475            "pure_virtual_clause",     // = 0
476            "ref_qualifier",           // &, &&
477            "sized_type_specifier",    // unsigned int
478            "statement_identifier",    // label name
479            "static_assert_declaration", // static_assert
480            "storage_class_specifier", // static, extern
481            "structured_binding_declarator", // auto [a, b]
482            "type_descriptor",         // type desc
483            "type_identifier",         // type name
484            "type_parameter_declaration", // template param
485            "type_qualifier",          // const, volatile
486            "union_specifier",         // union
487            "using_declaration",       // using ns::name
488            "variadic_parameter_declaration", // T...
489            "variadic_type_parameter_declaration", // typename...
490            "virtual_specifier",       // override, final
491
492            // CLAUSE
493            "else_clause",             // else
494            "noexcept",                // noexcept
495
496            // EXPRESSION (C++ adds many to C)
497            "alignof_expression",      // alignof(T)
498            "assignment_expression",   // x = y
499            "binary_expression",       // a + b
500            "call_expression",         // foo()
501            "cast_expression",         // (T)x
502            "co_await_expression",     // co_await x
503            "co_return_statement",     // co_return
504            "co_yield_statement",      // co_yield x
505            "comma_expression",        // a, b
506            "compound_literal_expression", // (T){...}
507            "delete_expression",       // delete x
508            "extension_expression",    // __extension__
509            "fold_expression",         // (... + args)
510            "generic_expression",      // _Generic
511            "gnu_asm_expression",      // asm()
512            "new_expression",          // new T
513            "offsetof_expression",     // offsetof
514            "parenthesized_expression",// (expr)
515            "pointer_expression",      // *p, &x
516            "reflect_expression",      // reflexpr
517            "sizeof_expression",       // sizeof(T)
518            "splice_expression",       // [:expr:]
519            "subscript_expression",    // arr[i]
520            "unary_expression",        // -x, !x
521            "update_expression",       // x++
522
523            // TEMPLATE
524            "template_declaration",    // template<>
525            "template_function",       // template func
526            "template_method",         // template method
527            "template_template_parameter_declaration", // template template
528            "template_type",           // T<U>
529
530            // LAMBDA
531            "lambda_capture_initializer", // [x = y]
532            "lambda_capture_specifier",   // [=], [&]
533            "lambda_declarator",       // lambda params
534            "lambda_default_capture",  // =, &
535            "lambda_specifier",        // mutable
536
537            // FUNCTION
538            "abstract_function_declarator", // abstract func
539            "explicit_function_specifier", // explicit
540            "explicit_object_parameter_declaration", // this param
541            "function_declarator",     // func decl
542            "operator_cast",           // operator T()
543            "optional_parameter_declaration", // param = default
544            "optional_type_parameter_declaration", // T = U
545            "placeholder_type_specifier", // auto, decltype(auto)
546            "pointer_type_declarator", // ptr declarator
547            "trailing_return_type",    // -> T
548
549            // CONCEPTS/REQUIRES
550            "concept_definition",      // concept
551            "requires_clause",         // requires
552            "requires_expression",     // requires {}
553            "type_requirement",        // typename T
554
555            // MODULE
556            "export_declaration",      // export
557            "global_module_fragment_declaration", // module;
558            "import_declaration",      // import
559            "module_declaration",      // module
560            "private_module_fragment_declaration", // module :private
561
562            // PREPROCESSOR
563            "preproc_elif",            // #elif
564            "preproc_elifdef",         // #elifdef
565            "preproc_else",            // #else
566            "preproc_function_def",    // function macro
567            "preproc_if",              // #if
568            "preproc_ifdef",           // #ifdef
569
570            // SPLICE
571            "splice_specifier",        // [: :] specifier
572            "splice_type_specifier",   // [: :] type
573
574            // OTHER
575            "alias_declaration",       // using X = Y
576            "alignas_qualifier",       // alignas
577            "attribute_declaration",   // [[attr]]
578            "attribute_specifier",     // __attribute__
579            "attributed_statement",    // stmt with attr
580            "consteval_block_declaration", // consteval
581            "decltype",                // decltype
582            "expansion_statement",     // pack expansion stmt
583            "expression_statement",    // expr;
584            "friend_declaration",      // friend
585            "gnu_asm_qualifier",       // asm qualifiers
586            "labeled_statement",       // label:
587            "namespace_alias_definition", // namespace X = Y
588            "qualified_identifier",    // ns::name
589            "throw_specifier",         // throw()
590
591            // MS EXTENSIONS
592            "ms_based_modifier",       // __based
593            "ms_call_modifier",        // __cdecl
594            "ms_declspec_modifier",    // __declspec
595            "ms_pointer_modifier",     // __ptr32
596            "ms_restrict_modifier",    // __restrict
597            "ms_signed_ptr_modifier",  // __sptr
598            "ms_unaligned_ptr_modifier", // __unaligned
599            "ms_unsigned_ptr_modifier", // __uptr
600
601            // SEH
602            "seh_except_clause",       // __except
603            "seh_finally_clause",      // __finally
604            "seh_leave_statement",     // __leave
605            "seh_try_statement",       // __try
606        ];
607
608        validate_unused_kinds_audit(&Cpp, documented_unused)
609            .expect("C++ unused node kinds audit failed");
610    }
611}