normalize-languages 0.3.2

Tree-sitter language support and dynamic grammar loading
Documentation
//! C language support.

use crate::{Import, Language, LanguageSymbols};
use tree_sitter::Node;

/// C language support.
pub struct C;

impl Language for C {
    fn name(&self) -> &'static str {
        "C"
    }
    fn extensions(&self) -> &'static [&'static str] {
        &["c", "h"]
    }
    fn grammar_name(&self) -> &'static str {
        "c"
    }

    fn as_symbols(&self) -> Option<&dyn LanguageSymbols> {
        Some(self)
    }

    fn signature_suffix(&self) -> &'static str {
        " {}"
    }

    fn extract_docstring(&self, node: &Node, content: &str) -> Option<String> {
        let mut prev = node.prev_sibling();
        while let Some(sibling) = prev {
            if sibling.kind() == "comment" {
                let text = &content[sibling.byte_range()];
                if text.starts_with("/**") {
                    return Some(clean_block_doc_comment(text));
                }
                return None;
            }
            // Skip other non-comment nodes (e.g. preprocessor directives)
            if sibling.kind() != "preproc_def" && sibling.kind() != "preproc_ifdef" {
                return None;
            }
            prev = sibling.prev_sibling();
        }
        None
    }

    fn extract_attributes(&self, node: &Node, content: &str) -> Vec<String> {
        let mut attrs = Vec::new();
        let mut cursor = node.walk();
        for child in node.children(&mut cursor) {
            match child.kind() {
                "attribute_declaration" | "attribute_specifier" | "ms_declspec_modifier" => {
                    attrs.push(content[child.byte_range()].trim().to_string());
                }
                _ => {}
            }
        }
        attrs
    }

    fn build_signature(&self, node: &Node, content: &str) -> String {
        match node.kind() {
            "function_definition" => {
                if let Some(declarator) = node.child_by_field_name("declarator")
                    && let Some(name) = C::find_identifier(&declarator, content)
                {
                    return name.to_string();
                }
                let text = &content[node.byte_range()];
                text.lines().next().unwrap_or(text).trim().to_string()
            }
            "struct_specifier" | "enum_specifier" => {
                let name = self.node_name(node, content).unwrap_or("");
                let keyword = if node.kind() == "struct_specifier" {
                    "struct"
                } else {
                    "enum"
                };
                format!("{} {}", keyword, name)
            }
            _ => {
                let text = &content[node.byte_range()];
                text.lines().next().unwrap_or(text).trim().to_string()
            }
        }
    }

    fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import> {
        if node.kind() != "preproc_include" {
            return Vec::new();
        }

        let line = node.start_position().row + 1;
        let mut cursor = node.walk();
        for child in node.children(&mut cursor) {
            if child.kind() == "string_literal" || child.kind() == "system_lib_string" {
                let text = &content[child.byte_range()];
                let module = text
                    .trim_matches(|c| c == '"' || c == '<' || c == '>')
                    .to_string();
                let is_relative = text.starts_with('"');
                return vec![Import {
                    module,
                    names: Vec::new(),
                    alias: None,
                    is_wildcard: false,
                    is_relative,
                    line,
                }];
            }
        }
        Vec::new()
    }

    fn format_import(&self, import: &Import, _names: Option<&[&str]>) -> String {
        // C doesn't have multi-imports; each #include is a single header
        if import.module.starts_with('<') || import.module.ends_with('>') {
            format!("#include {}", import.module)
        } else {
            format!("#include \"{}\"", import.module)
        }
    }

    fn is_test_symbol(&self, symbol: &crate::Symbol) -> bool {
        let name = symbol.name.as_str();
        match symbol.kind {
            crate::SymbolKind::Function | crate::SymbolKind::Method => name.starts_with("test_"),
            crate::SymbolKind::Module => name == "tests" || name == "test",
            _ => false,
        }
    }

    fn test_file_globs(&self) -> &'static [&'static str] {
        &["**/test_*.c", "**/*_test.c", "**/tests/**/*.c"]
    }

    fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>> {
        node.child_by_field_name("body")
    }

    fn node_name<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str> {
        // Try "name" field first
        if let Some(name_node) = node.child_by_field_name("name") {
            return Some(&content[name_node.byte_range()]);
        }
        // For functions, look in the declarator
        if let Some(declarator) = node.child_by_field_name("declarator") {
            return Self::find_identifier(&declarator, content);
        }
        None
    }
}

impl LanguageSymbols for C {}

impl C {
    fn find_identifier<'a>(node: &Node, content: &'a str) -> Option<&'a str> {
        if node.kind() == "identifier" {
            return Some(&content[node.byte_range()]);
        }
        let mut cursor = node.walk();
        for child in node.children(&mut cursor) {
            if let Some(id) = Self::find_identifier(&child, content) {
                return Some(id);
            }
        }
        None
    }
}

fn clean_block_doc_comment(text: &str) -> String {
    text.strip_prefix("/**")
        .unwrap_or(text)
        .strip_suffix("*/")
        .unwrap_or(text)
        .lines()
        .map(|l| l.trim().strip_prefix('*').unwrap_or(l).trim())
        .filter(|l| !l.is_empty())
        .collect::<Vec<_>>()
        .join(" ")
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::validate_unused_kinds_audit;

    /// Documents node kinds that exist in the C grammar but aren't used in trait methods.
    /// Run `cross_check_node_kinds` in registry.rs to see all potentially useful kinds.
    #[test]
    fn unused_node_kinds_audit() {
        #[rustfmt::skip]
        let documented_unused: &[&str] = &[
            // STRUCTURAL
            "bitfield_clause",         // : width             // declaration
            "declaration_list",        // decl list
            "enumerator",              // enum value
            "enumerator_list",         // enum body
            "field_declaration",       // struct field
            "field_declaration_list",  // struct body
            "field_expression",        // foo.bar
            "field_identifier",        // field name
            "identifier",              // too common
            "linkage_specification",   // extern "C"
            "parameter_declaration",   // param decl
            "primitive_type",          // int, char
            "sized_type_specifier",    // unsigned int
            "statement_identifier",    // label name
            "storage_class_specifier", // static, extern
            "type_descriptor",         // type desc
            "type_identifier",         // type name
            "type_qualifier",          // const, volatile
            "union_specifier",         // union

            // CLAUSE
            "else_clause",             // else

            // EXPRESSION
            "alignof_expression",      // alignof(T)
            "assignment_expression",   // x = y
            "binary_expression",       // a + b
            "call_expression",         // foo()
            "cast_expression",         // (T)x
            "comma_expression",        // a, b
            "compound_literal_expression", // (T){...}
            "extension_expression",    // __extension__
            "generic_expression",      // _Generic
            "gnu_asm_expression",      // asm()
            "offsetof_expression",     // offsetof
            "parenthesized_expression",// (expr)
            "pointer_expression",      // *p, &x
            "sizeof_expression",       // sizeof(T)
            "subscript_expression",    // arr[i]
            "unary_expression",        // -x, !x
            "update_expression",       // x++

            // FUNCTION
            "abstract_function_declarator", // abstract func     // func decl

            // PREPROCESSOR
            "preproc_elif",            // #elif
            "preproc_elifdef",         // #elifdef
            "preproc_else",            // #else
            "preproc_function_def",    // function macro
            "preproc_if",              // #if
            "preproc_ifdef",           // #ifdef

            // OTHER
            "alignas_qualifier",       // alignas
            "attribute_declaration",   // [[attr]] — used by extract_attributes (not detectable by audit)
            "attribute_specifier",     // __attribute__ — used by extract_attributes
            "attributed_statement",    // stmt with attr
            "expression_statement",    // expr;
            "gnu_asm_qualifier",       // asm qualifiers
            "labeled_statement",       // label:
            "macro_type_specifier",    // macro type

            // MS EXTENSIONS
            "ms_based_modifier",       // __based
            "ms_call_modifier",        // __cdecl
            "ms_declspec_modifier",    // __declspec — used by extract_attributes
            "ms_pointer_modifier",     // __ptr32
            "ms_restrict_modifier",    // __restrict
            "ms_signed_ptr_modifier",  // __sptr
            "ms_unaligned_ptr_modifier", // __unaligned
            "ms_unsigned_ptr_modifier", // __uptr

            // SEH
            "seh_except_clause",       // __except
            "seh_finally_clause",      // __finally
            "seh_leave_statement",     // __leave
            "seh_try_statement",       // __try
            // covered by tags.scm
            "function_definition",
            "if_statement",
            "conditional_expression",
            "case_statement",
            "continue_statement",
            "for_statement",
            "while_statement",
            "return_statement",
            "break_statement",
            "switch_statement",
            "compound_statement",
            "do_statement",
            "goto_statement",
        ];

        validate_unused_kinds_audit(&C, documented_unused)
            .expect("C unused node kinds audit failed");
    }
}