cadi_core/atomizer/languages/
c.rs1use crate::atomizer::{AtomizerConfig, ExtractedAtom, AtomKind};
4use crate::error::CadiResult;
5
6pub struct CAtomizer {
8 _config: AtomizerConfig,
9}
10
11impl CAtomizer {
12 pub fn new(config: AtomizerConfig) -> Self {
13 Self { _config: config }
14 }
15
16 #[cfg(feature = "ast-parsing")]
18 pub fn extract(&self, source: &str) -> CadiResult<Vec<ExtractedAtom>> {
19 use tree_sitter::{Parser, Query, QueryCursor};
20
21 let mut parser = Parser::new();
22 parser.set_language(&tree_sitter_c::language())?;
23
24 let tree = parser.parse(source, None)
25 .ok_or_else(|| crate::error::CadiError::AtomizerError("Parse failed".into()))?;
26
27 let mut atoms = Vec::new();
28
29 let query_src = r#"
31 (function_definition
32 declarator: (function_declarator
33 declarator: (identifier) @fn_name
34 )
35 ) @function
36
37 (struct_specifier
38 name: (type_identifier) @struct_name
39 ) @struct
40
41 (enum_specifier
42 name: (type_identifier) @enum_name
43 ) @enum
44
45 (type_definition
46 declarator: (type_identifier) @typedef_name
47 ) @typedef
48 "#;
49
50 let query = Query::new(&tree_sitter_c::language(), query_src)?;
51 let mut cursor = QueryCursor::new();
52
53 let matches = cursor.matches(&query, tree.root_node(), source.as_bytes());
54
55 for m in matches {
56 let mut name = "unknown".to_string();
57 let mut kind = AtomKind::Function;
58 let mut atom_node = None;
59
60 for capture in m.captures {
61 let capture_name = query.capture_names()[capture.index as usize];
62 match capture_name {
63 "fn_name" | "struct_name" | "enum_name" | "typedef_name" => {
64 name = capture.node.utf8_text(source.as_bytes()).unwrap_or("unknown").to_string();
65 }
66 "function" => {
67 kind = AtomKind::Function;
68 atom_node = Some(capture.node);
69 }
70 "struct" => {
71 kind = AtomKind::Struct;
72 atom_node = Some(capture.node);
73 }
74 "enum" => {
75 kind = AtomKind::Enum;
76 atom_node = Some(capture.node);
77 }
78 "typedef" => {
79 kind = AtomKind::TypeAlias;
80 atom_node = Some(capture.node);
81 }
82 _ => {}
83 }
84 }
85
86 if let Some(node) = atom_node {
87 let start_byte = node.start_byte();
88 let end_byte = node.end_byte();
89 let start_point = node.start_position();
90 let end_point = node.end_position();
91
92 atoms.push(ExtractedAtom {
93 name,
94 kind,
95 source: source[start_byte..end_byte].to_string(),
96 start_byte,
97 end_byte,
98 start_line: start_point.row + 1,
99 end_line: end_point.row + 1,
100 defines: vec![], references: Vec::new(),
102 doc_comment: None,
103 visibility: crate::atomizer::extractor::Visibility::Public,
104 parent: None,
105 decorators: Vec::new(),
106 });
107 }
108 }
109
110 Ok(atoms)
111 }
112
113 #[cfg(not(feature = "ast-parsing"))]
115 pub fn extract(&self, source: &str) -> CadiResult<Vec<ExtractedAtom>> {
116 use crate::atomizer::AtomExtractor;
117 AtomExtractor::new("c", self._config.clone()).extract(source)
118 }
119}
120
121#[cfg(test)]
122mod tests {
123 use super::*;
124 use crate::atomizer::AtomizerConfig;
125
126 #[test]
127 fn test_c_extraction() {
128 let source = r#"
129 struct Point {
130 int x;
131 int y;
132 };
133
134 int add(int a, int b) {
135 return a + b;
136 }
137 "#;
138
139 let atomizer = CAtomizer::new(AtomizerConfig::default());
140 let atoms = atomizer.extract(source).unwrap();
141
142 assert!(atoms.iter().any(|a| a.name == "Point"));
144 assert!(atoms.iter().any(|a| a.name == "add"));
145 }
146}