Skip to main content

normalize_languages/
c.rs

1//! C language support.
2
3use crate::c_cpp;
4use crate::external_packages::ResolvedPackage;
5use crate::{Export, Import, Language, Symbol, SymbolKind, Visibility, VisibilityMechanism};
6use std::path::{Path, PathBuf};
7use tree_sitter::Node;
8
9/// C language support.
10pub struct C;
11
12impl Language for C {
13    fn name(&self) -> &'static str {
14        "C"
15    }
16    fn extensions(&self) -> &'static [&'static str] {
17        &["c", "h"]
18    }
19    fn grammar_name(&self) -> &'static str {
20        "c"
21    }
22
23    fn has_symbols(&self) -> bool {
24        true
25    }
26
27    fn container_kinds(&self) -> &'static [&'static str] {
28        &[]
29    } // C doesn't have containers
30    fn function_kinds(&self) -> &'static [&'static str] {
31        &["function_definition"]
32    }
33    fn type_kinds(&self) -> &'static [&'static str] {
34        &["struct_specifier", "enum_specifier", "type_definition"]
35    }
36    fn import_kinds(&self) -> &'static [&'static str] {
37        &["preproc_include"]
38    }
39
40    fn public_symbol_kinds(&self) -> &'static [&'static str] {
41        &["function_definition"]
42    }
43
44    fn visibility_mechanism(&self) -> VisibilityMechanism {
45        VisibilityMechanism::HeaderBased
46    }
47    fn scope_creating_kinds(&self) -> &'static [&'static str] {
48        &["for_statement", "while_statement", "compound_statement"]
49    }
50
51    fn control_flow_kinds(&self) -> &'static [&'static str] {
52        &[
53            "if_statement",
54            "for_statement",
55            "while_statement",
56            "do_statement",
57            "switch_statement",
58            "return_statement",
59            "break_statement",
60            "continue_statement",
61            "goto_statement",
62        ]
63    }
64
65    fn complexity_nodes(&self) -> &'static [&'static str] {
66        &[
67            "if_statement",
68            "for_statement",
69            "while_statement",
70            "do_statement",
71            "switch_statement",
72            "case_statement",
73            "&&",
74            "||",
75            "conditional_expression",
76        ]
77    }
78
79    fn nesting_nodes(&self) -> &'static [&'static str] {
80        &[
81            "if_statement",
82            "for_statement",
83            "while_statement",
84            "do_statement",
85            "switch_statement",
86            "function_definition",
87        ]
88    }
89
90    fn signature_suffix(&self) -> &'static str {
91        " {}"
92    }
93
94    fn extract_function(&self, node: &Node, content: &str, _in_container: bool) -> Option<Symbol> {
95        let declarator = node.child_by_field_name("declarator")?;
96        let name = self.find_identifier(&declarator, content)?;
97
98        Some(Symbol {
99            name: name.to_string(),
100            kind: SymbolKind::Function,
101            signature: name.to_string(),
102            docstring: None,
103            attributes: Vec::new(),
104            start_line: node.start_position().row + 1,
105            end_line: node.end_position().row + 1,
106            visibility: Visibility::Public,
107            children: Vec::new(),
108            is_interface_impl: false,
109            implements: Vec::new(),
110        })
111    }
112
113    fn extract_container(&self, _node: &Node, _content: &str) -> Option<Symbol> {
114        None // C doesn't have containers in the same sense
115    }
116
117    fn extract_type(&self, node: &Node, content: &str) -> Option<Symbol> {
118        let name = self.node_name(node, content)?;
119        let kind = match node.kind() {
120            "struct_specifier" => SymbolKind::Struct,
121            "enum_specifier" => SymbolKind::Enum,
122            _ => SymbolKind::Type,
123        };
124
125        Some(Symbol {
126            name: name.to_string(),
127            kind,
128            signature: format!("{} {}", kind.as_str(), name),
129            docstring: None,
130            attributes: Vec::new(),
131            start_line: node.start_position().row + 1,
132            end_line: node.end_position().row + 1,
133            visibility: Visibility::Public,
134            children: Vec::new(),
135            is_interface_impl: false,
136            implements: Vec::new(),
137        })
138    }
139
140    fn extract_docstring(&self, _node: &Node, _content: &str) -> Option<String> {
141        None
142    }
143
144    fn extract_attributes(&self, _node: &Node, _content: &str) -> Vec<String> {
145        Vec::new()
146    }
147
148    fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import> {
149        if node.kind() != "preproc_include" {
150            return Vec::new();
151        }
152
153        let line = node.start_position().row + 1;
154        let mut cursor = node.walk();
155        for child in node.children(&mut cursor) {
156            if child.kind() == "string_literal" || child.kind() == "system_lib_string" {
157                let text = &content[child.byte_range()];
158                let module = text
159                    .trim_matches(|c| c == '"' || c == '<' || c == '>')
160                    .to_string();
161                let is_relative = text.starts_with('"');
162                return vec![Import {
163                    module,
164                    names: Vec::new(),
165                    alias: None,
166                    is_wildcard: false,
167                    is_relative,
168                    line,
169                }];
170            }
171        }
172        Vec::new()
173    }
174
175    fn format_import(&self, import: &Import, _names: Option<&[&str]>) -> String {
176        // C doesn't have multi-imports; each #include is a single header
177        if import.module.starts_with('<') || import.module.ends_with('>') {
178            format!("#include {}", import.module)
179        } else {
180            format!("#include \"{}\"", import.module)
181        }
182    }
183
184    fn extract_public_symbols(&self, node: &Node, content: &str) -> Vec<Export> {
185        if node.kind() != "function_definition" {
186            return Vec::new();
187        }
188
189        if let Some(name) = self.node_name(node, content) {
190            vec![Export {
191                name: name.to_string(),
192                kind: SymbolKind::Function,
193                line: node.start_position().row + 1,
194            }]
195        } else {
196            Vec::new()
197        }
198    }
199
200    fn is_public(&self, _node: &Node, _content: &str) -> bool {
201        true // C doesn't have visibility modifiers
202    }
203
204    fn get_visibility(&self, _node: &Node, _content: &str) -> Visibility {
205        Visibility::Public
206    }
207
208    fn is_test_symbol(&self, symbol: &crate::Symbol) -> bool {
209        let name = symbol.name.as_str();
210        match symbol.kind {
211            crate::SymbolKind::Function | crate::SymbolKind::Method => name.starts_with("test_"),
212            crate::SymbolKind::Module => name == "tests" || name == "test",
213            _ => false,
214        }
215    }
216
217    fn embedded_content(&self, _node: &Node, _content: &str) -> Option<crate::EmbeddedBlock> {
218        None
219    }
220
221    fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>> {
222        node.child_by_field_name("body")
223    }
224
225    fn body_has_docstring(&self, _body: &Node, _content: &str) -> bool {
226        false
227    }
228
229    fn node_name<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str> {
230        // Try "name" field first
231        if let Some(name_node) = node.child_by_field_name("name") {
232            return Some(&content[name_node.byte_range()]);
233        }
234        // For functions, look in the declarator
235        if let Some(declarator) = node.child_by_field_name("declarator") {
236            return self.find_identifier(&declarator, content);
237        }
238        None
239    }
240
241    fn file_path_to_module_name(&self, path: &Path) -> Option<String> {
242        let ext = path.extension()?.to_str()?;
243        if !["c", "h"].contains(&ext) {
244            return None;
245        }
246        Some(path.to_string_lossy().to_string())
247    }
248
249    fn module_name_to_paths(&self, module: &str) -> Vec<String> {
250        vec![module.to_string()]
251    }
252
253    fn is_stdlib_import(&self, include: &str, _project_root: &Path) -> bool {
254        // Standard C headers
255        let stdlib = [
256            "stdio.h", "stdlib.h", "string.h", "math.h", "time.h", "ctype.h", "errno.h", "float.h",
257            "limits.h", "locale.h", "setjmp.h", "signal.h", "stdarg.h", "stddef.h", "assert.h",
258        ];
259        stdlib.contains(&include)
260    }
261
262    fn find_package_cache(&self, _project_root: &Path) -> Option<PathBuf> {
263        None // C uses include paths, not a package cache
264    }
265
266    fn find_stdlib(&self, _project_root: &Path) -> Option<PathBuf> {
267        // Return the first include path as stdlib location
268        c_cpp::find_cpp_include_paths().into_iter().next()
269    }
270
271    fn package_sources(&self, _project_root: &Path) -> Vec<crate::PackageSource> {
272        use crate::{PackageSource, PackageSourceKind};
273        c_cpp::find_cpp_include_paths()
274            .into_iter()
275            .map(|path| PackageSource {
276                name: "includes",
277                path,
278                kind: PackageSourceKind::Recursive,
279                version_specific: false,
280            })
281            .collect()
282    }
283
284    fn package_module_name(&self, entry_name: &str) -> String {
285        entry_name.to_string()
286    }
287
288    fn discover_packages(&self, source: &crate::PackageSource) -> Vec<(String, PathBuf)> {
289        self.discover_recursive_packages(&source.path, &source.path)
290    }
291
292    fn find_package_entry(&self, path: &Path) -> Option<PathBuf> {
293        if path.is_file() {
294            Some(path.to_path_buf())
295        } else {
296            None
297        }
298    }
299
300    // === Import Resolution ===
301
302    fn lang_key(&self) -> &'static str {
303        "c"
304    }
305
306    fn resolve_local_import(
307        &self,
308        include: &str,
309        current_file: &Path,
310        _project_root: &Path,
311    ) -> Option<PathBuf> {
312        // Strip quotes if present
313        let header = include
314            .trim_start_matches('"')
315            .trim_end_matches('"')
316            .trim_start_matches('<')
317            .trim_end_matches('>');
318
319        let current_dir = current_file.parent()?;
320
321        // Try relative to current file's directory
322        let relative = current_dir.join(header);
323        if relative.is_file() {
324            return Some(relative);
325        }
326
327        // Try with common extensions if none specified
328        if !header.contains('.') {
329            for ext in &[".h", ".c"] {
330                let with_ext = current_dir.join(format!("{}{}", header, ext));
331                if with_ext.is_file() {
332                    return Some(with_ext);
333                }
334            }
335        }
336
337        None
338    }
339
340    fn resolve_external_import(
341        &self,
342        include: &str,
343        _project_root: &Path,
344    ) -> Option<ResolvedPackage> {
345        let include_paths = c_cpp::find_cpp_include_paths();
346        c_cpp::resolve_cpp_include(include, &include_paths)
347    }
348
349    fn get_version(&self, _project_root: &Path) -> Option<String> {
350        c_cpp::get_gcc_version()
351    }
352
353    fn indexable_extensions(&self) -> &'static [&'static str] {
354        &["c", "h"]
355    }
356
357    fn should_skip_package_entry(&self, name: &str, is_dir: bool) -> bool {
358        use crate::traits::{has_extension, skip_dotfiles};
359        if skip_dotfiles(name) {
360            return true;
361        }
362        !is_dir && !has_extension(name, self.indexable_extensions())
363    }
364}
365
366impl C {
367    fn find_identifier<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str> {
368        if node.kind() == "identifier" {
369            return Some(&content[node.byte_range()]);
370        }
371        let mut cursor = node.walk();
372        for child in node.children(&mut cursor) {
373            if let Some(id) = self.find_identifier(&child, content) {
374                return Some(id);
375            }
376        }
377        None
378    }
379}
380
381#[cfg(test)]
382mod tests {
383    use super::*;
384    use crate::validate_unused_kinds_audit;
385
386    /// Documents node kinds that exist in the C grammar but aren't used in trait methods.
387    /// Run `cross_check_node_kinds` in registry.rs to see all potentially useful kinds.
388    #[test]
389    fn unused_node_kinds_audit() {
390        #[rustfmt::skip]
391        let documented_unused: &[&str] = &[
392            // STRUCTURAL
393            "bitfield_clause",         // : width
394            "declaration",             // declaration
395            "declaration_list",        // decl list
396            "enumerator",              // enum value
397            "enumerator_list",         // enum body
398            "field_declaration",       // struct field
399            "field_declaration_list",  // struct body
400            "field_expression",        // foo.bar
401            "field_identifier",        // field name
402            "identifier",              // too common
403            "linkage_specification",   // extern "C"
404            "parameter_declaration",   // param decl
405            "primitive_type",          // int, char
406            "sized_type_specifier",    // unsigned int
407            "statement_identifier",    // label name
408            "storage_class_specifier", // static, extern
409            "type_descriptor",         // type desc
410            "type_identifier",         // type name
411            "type_qualifier",          // const, volatile
412            "union_specifier",         // union
413
414            // CLAUSE
415            "else_clause",             // else
416
417            // EXPRESSION
418            "alignof_expression",      // alignof(T)
419            "assignment_expression",   // x = y
420            "binary_expression",       // a + b
421            "call_expression",         // foo()
422            "cast_expression",         // (T)x
423            "comma_expression",        // a, b
424            "compound_literal_expression", // (T){...}
425            "extension_expression",    // __extension__
426            "generic_expression",      // _Generic
427            "gnu_asm_expression",      // asm()
428            "offsetof_expression",     // offsetof
429            "parenthesized_expression",// (expr)
430            "pointer_expression",      // *p, &x
431            "sizeof_expression",       // sizeof(T)
432            "subscript_expression",    // arr[i]
433            "unary_expression",        // -x, !x
434            "update_expression",       // x++
435
436            // FUNCTION
437            "abstract_function_declarator", // abstract func
438            "function_declarator",     // func decl
439
440            // PREPROCESSOR
441            "preproc_elif",            // #elif
442            "preproc_elifdef",         // #elifdef
443            "preproc_else",            // #else
444            "preproc_function_def",    // function macro
445            "preproc_if",              // #if
446            "preproc_ifdef",           // #ifdef
447
448            // OTHER
449            "alignas_qualifier",       // alignas
450            "attribute_declaration",   // [[attr]]
451            "attribute_specifier",     // __attribute__
452            "attributed_statement",    // stmt with attr
453            "expression_statement",    // expr;
454            "gnu_asm_qualifier",       // asm qualifiers
455            "labeled_statement",       // label:
456            "macro_type_specifier",    // macro type
457
458            // MS EXTENSIONS
459            "ms_based_modifier",       // __based
460            "ms_call_modifier",        // __cdecl
461            "ms_declspec_modifier",    // __declspec
462            "ms_pointer_modifier",     // __ptr32
463            "ms_restrict_modifier",    // __restrict
464            "ms_signed_ptr_modifier",  // __sptr
465            "ms_unaligned_ptr_modifier", // __unaligned
466            "ms_unsigned_ptr_modifier", // __uptr
467
468            // SEH
469            "seh_except_clause",       // __except
470            "seh_finally_clause",      // __finally
471            "seh_leave_statement",     // __leave
472            "seh_try_statement",       // __try
473        ];
474
475        validate_unused_kinds_audit(&C, documented_unused)
476            .expect("C unused node kinds audit failed");
477    }
478}