Skip to main content

normalize_languages/
c.rs

1//! C language support.
2
3use crate::{Import, Language, LanguageSymbols};
4use tree_sitter::Node;
5
6/// C language support.
7pub struct C;
8
9impl Language for C {
10    fn name(&self) -> &'static str {
11        "C"
12    }
13    fn extensions(&self) -> &'static [&'static str] {
14        &["c", "h"]
15    }
16    fn grammar_name(&self) -> &'static str {
17        "c"
18    }
19
20    fn as_symbols(&self) -> Option<&dyn LanguageSymbols> {
21        Some(self)
22    }
23
24    fn signature_suffix(&self) -> &'static str {
25        " {}"
26    }
27
28    fn extract_docstring(&self, node: &Node, content: &str) -> Option<String> {
29        let mut prev = node.prev_sibling();
30        while let Some(sibling) = prev {
31            if sibling.kind() == "comment" {
32                let text = &content[sibling.byte_range()];
33                if text.starts_with("/**") {
34                    return Some(clean_block_doc_comment(text));
35                }
36                return None;
37            }
38            // Skip other non-comment nodes (e.g. preprocessor directives)
39            if sibling.kind() != "preproc_def" && sibling.kind() != "preproc_ifdef" {
40                return None;
41            }
42            prev = sibling.prev_sibling();
43        }
44        None
45    }
46
47    fn extract_attributes(&self, node: &Node, content: &str) -> Vec<String> {
48        let mut attrs = Vec::new();
49        let mut cursor = node.walk();
50        for child in node.children(&mut cursor) {
51            match child.kind() {
52                "attribute_declaration" | "attribute_specifier" | "ms_declspec_modifier" => {
53                    attrs.push(content[child.byte_range()].trim().to_string());
54                }
55                _ => {}
56            }
57        }
58        attrs
59    }
60
61    fn build_signature(&self, node: &Node, content: &str) -> String {
62        match node.kind() {
63            "function_definition" => {
64                if let Some(declarator) = node.child_by_field_name("declarator")
65                    && let Some(name) = C::find_identifier(&declarator, content)
66                {
67                    return name.to_string();
68                }
69                let text = &content[node.byte_range()];
70                text.lines().next().unwrap_or(text).trim().to_string()
71            }
72            "struct_specifier" | "enum_specifier" => {
73                let name = self.node_name(node, content).unwrap_or("");
74                let keyword = if node.kind() == "struct_specifier" {
75                    "struct"
76                } else {
77                    "enum"
78                };
79                format!("{} {}", keyword, name)
80            }
81            _ => {
82                let text = &content[node.byte_range()];
83                text.lines().next().unwrap_or(text).trim().to_string()
84            }
85        }
86    }
87
88    fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import> {
89        if node.kind() != "preproc_include" {
90            return Vec::new();
91        }
92
93        let line = node.start_position().row + 1;
94        let mut cursor = node.walk();
95        for child in node.children(&mut cursor) {
96            if child.kind() == "string_literal" || child.kind() == "system_lib_string" {
97                let text = &content[child.byte_range()];
98                let module = text
99                    .trim_matches(|c| c == '"' || c == '<' || c == '>')
100                    .to_string();
101                let is_relative = text.starts_with('"');
102                return vec![Import {
103                    module,
104                    names: Vec::new(),
105                    alias: None,
106                    is_wildcard: false,
107                    is_relative,
108                    line,
109                }];
110            }
111        }
112        Vec::new()
113    }
114
115    fn format_import(&self, import: &Import, _names: Option<&[&str]>) -> String {
116        // C doesn't have multi-imports; each #include is a single header
117        if import.module.starts_with('<') || import.module.ends_with('>') {
118            format!("#include {}", import.module)
119        } else {
120            format!("#include \"{}\"", import.module)
121        }
122    }
123
124    fn is_test_symbol(&self, symbol: &crate::Symbol) -> bool {
125        let name = symbol.name.as_str();
126        match symbol.kind {
127            crate::SymbolKind::Function | crate::SymbolKind::Method => name.starts_with("test_"),
128            crate::SymbolKind::Module => name == "tests" || name == "test",
129            _ => false,
130        }
131    }
132
133    fn test_file_globs(&self) -> &'static [&'static str] {
134        &["**/test_*.c", "**/*_test.c", "**/tests/**/*.c"]
135    }
136
137    fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>> {
138        node.child_by_field_name("body")
139    }
140
141    fn node_name<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str> {
142        // Try "name" field first
143        if let Some(name_node) = node.child_by_field_name("name") {
144            return Some(&content[name_node.byte_range()]);
145        }
146        // For functions, look in the declarator
147        if let Some(declarator) = node.child_by_field_name("declarator") {
148            return Self::find_identifier(&declarator, content);
149        }
150        None
151    }
152}
153
154impl LanguageSymbols for C {}
155
156impl C {
157    fn find_identifier<'a>(node: &Node, content: &'a str) -> Option<&'a str> {
158        if node.kind() == "identifier" {
159            return Some(&content[node.byte_range()]);
160        }
161        let mut cursor = node.walk();
162        for child in node.children(&mut cursor) {
163            if let Some(id) = Self::find_identifier(&child, content) {
164                return Some(id);
165            }
166        }
167        None
168    }
169}
170
171fn clean_block_doc_comment(text: &str) -> String {
172    text.strip_prefix("/**")
173        .unwrap_or(text)
174        .strip_suffix("*/")
175        .unwrap_or(text)
176        .lines()
177        .map(|l| l.trim().strip_prefix('*').unwrap_or(l).trim())
178        .filter(|l| !l.is_empty())
179        .collect::<Vec<_>>()
180        .join(" ")
181}
182
183#[cfg(test)]
184mod tests {
185    use super::*;
186    use crate::validate_unused_kinds_audit;
187
188    /// Documents node kinds that exist in the C grammar but aren't used in trait methods.
189    /// Run `cross_check_node_kinds` in registry.rs to see all potentially useful kinds.
190    #[test]
191    fn unused_node_kinds_audit() {
192        #[rustfmt::skip]
193        let documented_unused: &[&str] = &[
194            // STRUCTURAL
195            "bitfield_clause",         // : width             // declaration
196            "declaration_list",        // decl list
197            "enumerator",              // enum value
198            "enumerator_list",         // enum body
199            "field_declaration",       // struct field
200            "field_declaration_list",  // struct body
201            "field_expression",        // foo.bar
202            "field_identifier",        // field name
203            "identifier",              // too common
204            "linkage_specification",   // extern "C"
205            "parameter_declaration",   // param decl
206            "primitive_type",          // int, char
207            "sized_type_specifier",    // unsigned int
208            "statement_identifier",    // label name
209            "storage_class_specifier", // static, extern
210            "type_descriptor",         // type desc
211            "type_identifier",         // type name
212            "type_qualifier",          // const, volatile
213            "union_specifier",         // union
214
215            // CLAUSE
216            "else_clause",             // else
217
218            // EXPRESSION
219            "alignof_expression",      // alignof(T)
220            "assignment_expression",   // x = y
221            "binary_expression",       // a + b
222            "call_expression",         // foo()
223            "cast_expression",         // (T)x
224            "comma_expression",        // a, b
225            "compound_literal_expression", // (T){...}
226            "extension_expression",    // __extension__
227            "generic_expression",      // _Generic
228            "gnu_asm_expression",      // asm()
229            "offsetof_expression",     // offsetof
230            "parenthesized_expression",// (expr)
231            "pointer_expression",      // *p, &x
232            "sizeof_expression",       // sizeof(T)
233            "subscript_expression",    // arr[i]
234            "unary_expression",        // -x, !x
235            "update_expression",       // x++
236
237            // FUNCTION
238            "abstract_function_declarator", // abstract func     // func decl
239
240            // PREPROCESSOR
241            "preproc_elif",            // #elif
242            "preproc_elifdef",         // #elifdef
243            "preproc_else",            // #else
244            "preproc_function_def",    // function macro
245            "preproc_if",              // #if
246            "preproc_ifdef",           // #ifdef
247
248            // OTHER
249            "alignas_qualifier",       // alignas
250            "attribute_declaration",   // [[attr]] — used by extract_attributes (not detectable by audit)
251            "attribute_specifier",     // __attribute__ — used by extract_attributes
252            "attributed_statement",    // stmt with attr
253            "expression_statement",    // expr;
254            "gnu_asm_qualifier",       // asm qualifiers
255            "labeled_statement",       // label:
256            "macro_type_specifier",    // macro type
257
258            // MS EXTENSIONS
259            "ms_based_modifier",       // __based
260            "ms_call_modifier",        // __cdecl
261            "ms_declspec_modifier",    // __declspec — used by extract_attributes
262            "ms_pointer_modifier",     // __ptr32
263            "ms_restrict_modifier",    // __restrict
264            "ms_signed_ptr_modifier",  // __sptr
265            "ms_unaligned_ptr_modifier", // __unaligned
266            "ms_unsigned_ptr_modifier", // __uptr
267
268            // SEH
269            "seh_except_clause",       // __except
270            "seh_finally_clause",      // __finally
271            "seh_leave_statement",     // __leave
272            "seh_try_statement",       // __try
273            // covered by tags.scm
274            "function_definition",
275            "if_statement",
276            "conditional_expression",
277            "case_statement",
278            "continue_statement",
279            "for_statement",
280            "while_statement",
281            "return_statement",
282            "break_statement",
283            "switch_statement",
284            "compound_statement",
285            "do_statement",
286            "goto_statement",
287        ];
288
289        validate_unused_kinds_audit(&C, documented_unused)
290            .expect("C unused node kinds audit failed");
291    }
292}