cadi_core/atomizer/languages/
c.rs

1//! C-specific atomizer
2
3use crate::atomizer::{AtomizerConfig, ExtractedAtom, AtomKind};
4use crate::error::CadiResult;
5
6/// C-specific atomizer with Tree-sitter support
7pub struct CAtomizer {
8    _config: AtomizerConfig,
9}
10
11impl CAtomizer {
12    pub fn new(config: AtomizerConfig) -> Self {
13        Self { _config: config }
14    }
15
16    /// Extract atoms using Tree-sitter (when feature enabled)
17    #[cfg(feature = "ast-parsing")]
18    pub fn extract(&self, source: &str) -> CadiResult<Vec<ExtractedAtom>> {
19        use tree_sitter::{Parser, Query, QueryCursor};
20        
21        let mut parser = Parser::new();
22        parser.set_language(&tree_sitter_c::language())?;
23        
24        let tree = parser.parse(source, None)
25            .ok_or_else(|| crate::error::CadiError::AtomizerError("Parse failed".into()))?;
26        
27        let mut atoms = Vec::new();
28        
29        // Tree-sitter queries for C
30        let query_src = r#"
31            (function_definition
32                declarator: (function_declarator
33                    declarator: (identifier) @fn_name
34                )
35            ) @function
36            
37            (struct_specifier
38                name: (type_identifier) @struct_name
39            ) @struct
40            
41            (enum_specifier
42                name: (type_identifier) @enum_name
43            ) @enum
44            
45            (type_definition
46                declarator: (type_identifier) @typedef_name
47            ) @typedef
48        "#;
49        
50        let query = Query::new(&tree_sitter_c::language(), query_src)?;
51        let mut cursor = QueryCursor::new();
52        
53        let matches = cursor.matches(&query, tree.root_node(), source.as_bytes());
54        
55        for m in matches {
56            let mut name = "unknown".to_string();
57            let mut kind = AtomKind::Function;
58            let mut atom_node = None;
59
60            for capture in m.captures {
61                let capture_name = query.capture_names()[capture.index as usize];
62                match capture_name {
63                    "fn_name" | "struct_name" | "enum_name" | "typedef_name" => {
64                        name = capture.node.utf8_text(source.as_bytes()).unwrap_or("unknown").to_string();
65                    }
66                    "function" => {
67                        kind = AtomKind::Function;
68                        atom_node = Some(capture.node);
69                    }
70                    "struct" => {
71                        kind = AtomKind::Struct;
72                        atom_node = Some(capture.node);
73                    }
74                    "enum" => {
75                        kind = AtomKind::Enum;
76                        atom_node = Some(capture.node);
77                    }
78                    "typedef" => {
79                        kind = AtomKind::TypeAlias;
80                        atom_node = Some(capture.node);
81                    }
82                    _ => {}
83                }
84            }
85
86            if let Some(node) = atom_node {
87                let start_byte = node.start_byte();
88                let end_byte = node.end_byte();
89                let start_point = node.start_position();
90                let end_point = node.end_position();
91
92                atoms.push(ExtractedAtom {
93                    name,
94                    kind,
95                    source: source[start_byte..end_byte].to_string(),
96                    start_byte,
97                    end_byte,
98                    start_line: start_point.row + 1,
99                    end_line: end_point.row + 1,
100                    defines: vec![], // Will be filled by extractor/resolver
101                    references: Vec::new(),
102                    doc_comment: None,
103                    visibility: crate::atomizer::extractor::Visibility::Public,
104                    parent: None,
105                    decorators: Vec::new(),
106                });
107            }
108        }
109        
110        Ok(atoms)
111    }
112    
113    /// Fallback extraction without Tree-sitter
114    #[cfg(not(feature = "ast-parsing"))]
115    pub fn extract(&self, source: &str) -> CadiResult<Vec<ExtractedAtom>> {
116        use crate::atomizer::AtomExtractor;
117        AtomExtractor::new("c", self._config.clone()).extract(source)
118    }
119}
120
121#[cfg(test)]
122mod tests {
123    use super::*;
124    use crate::atomizer::AtomizerConfig;
125
126    #[test]
127    fn test_c_extraction() {
128        let source = r#"
129            struct Point {
130                int x;
131                int y;
132            };
133
134            int add(int a, int b) {
135                return a + b;
136            }
137        "#;
138
139        let atomizer = CAtomizer::new(AtomizerConfig::default());
140        let atoms = atomizer.extract(source).unwrap();
141
142        // Should find struct Point and function add
143        assert!(atoms.iter().any(|a| a.name == "Point"));
144        assert!(atoms.iter().any(|a| a.name == "add"));
145    }
146}