semantic/parser/
parser_core.rs1use tree_sitter::{Node, Parser, Tree as TSTree};
5
6use super::{
7 parser_language::Language,
8 parser_types::{FunctionDef, Import, ImportKind},
9};
10
11#[derive(Debug)]
13pub struct ParsedFile {
14 pub language: Language,
15 pub source: String,
16 tree: TSTree,
17}
18
19impl ParsedFile {
20 pub fn parse(source: impl Into<String>, language: Language) -> Option<Self> {
22 let source = source.into();
23 let lang = language.parser()?;
24
25 let mut parser = Parser::new();
26 parser.set_language(&lang).ok()?;
27 let tree = parser.parse(&source, None)?;
28
29 if tree.root_node().has_error() {
30 return None;
31 }
32
33 Some(Self {
34 language,
35 source,
36 tree,
37 })
38 }
39
40 pub fn root_node(&self) -> Node<'_> {
42 self.tree.root_node()
43 }
44
45 pub fn extract_functions(&self) -> Vec<FunctionDef> {
47 let mut functions = Vec::new();
48 let mut stack = vec![self.root_node()];
49
50 while let Some(node) = stack.pop() {
51 if Self::is_function_node(&node, self.language)
52 && let Some(name) = self.get_function_name(&node)
53 {
54 functions.push(FunctionDef {
55 name: name.to_string(),
56 signature: self.get_function_signature(&node),
57 start_line: node.start_position().row,
58 end_line: node.end_position().row,
59 content: self.source[node.byte_range()].to_string(),
60 });
61 }
62
63 push_children_reverse(node, &mut stack);
64 }
65
66 functions
67 }
68
69 pub fn extract_imports(&self) -> Vec<Import> {
71 match self.language {
72 Language::Rust => self.extract_rust_imports(),
73 Language::Python => self.extract_imports_by_kind(
74 &["import_statement", "import_from_statement"],
75 ImportKind::Import,
76 ),
77 Language::JavaScript | Language::TypeScript => {
78 self.extract_imports_by_kind(&["import_statement"], ImportKind::Import)
79 }
80 Language::Go | Language::Java => {
81 self.extract_imports_by_kind(&["import_declaration"], ImportKind::Import)
82 }
83 _ => Vec::new(),
84 }
85 }
86
87 pub fn is_function_kind(kind: &str, language: Language) -> bool {
89 match language {
90 Language::Rust => {
91 kind == "function_item"
92 || kind == "method_declaration"
93 || kind == "closure_expression"
94 }
95 Language::Python => kind == "function_definition",
96 Language::JavaScript | Language::TypeScript => {
97 kind == "function_declaration"
98 || kind == "method_definition"
99 || kind == "generator_function_declaration"
100 || kind == "variable_declarator"
101 }
102 Language::Go => kind == "function_declaration" || kind == "method_declaration",
103 Language::C | Language::Cpp => kind == "function_definition",
104 Language::Java => kind == "method_declaration" || kind == "constructor_declaration",
105 _ => false,
106 }
107 }
108
109 fn is_function_node(node: &Node<'_>, language: Language) -> bool {
110 match language {
111 Language::Rust => {
112 node.kind() == "function_item"
113 || node.kind() == "method_declaration"
114 || node.kind() == "closure_expression"
115 }
116 Language::Python => node.kind() == "function_definition",
117 Language::JavaScript | Language::TypeScript => {
118 node.kind() == "function_declaration"
119 || node.kind() == "method_definition"
120 || node.kind() == "generator_function_declaration"
121 || (node.kind() == "variable_declarator"
122 && node
123 .child_by_field_name("value")
124 .is_some_and(|value| is_javascript_function_value(value.kind())))
125 }
126 Language::Go => {
127 node.kind() == "function_declaration" || node.kind() == "method_declaration"
128 }
129 Language::C | Language::Cpp => node.kind() == "function_definition",
130 Language::Java => {
131 node.kind() == "method_declaration" || node.kind() == "constructor_declaration"
132 }
133 _ => false,
134 }
135 }
136
137 fn get_function_name(&self, node: &Node<'_>) -> Option<&str> {
138 if let Some(name) = node.child_by_field_name("name") {
139 return Some(&self.source[name.byte_range()]);
140 }
141 if let Some(declarator) = node.child_by_field_name("declarator") {
142 if let Some(name) = self.c_function_name(declarator) {
143 return Some(name);
144 }
145 if let Some(name) = self.find_identifier_in_subtree(declarator) {
146 return Some(name);
147 }
148 }
149
150 for i in 0..node.child_count() {
151 if let Some(child) = node.child(i as u32)
152 && matches!(
153 child.kind(),
154 "identifier" | "field_identifier" | "type_identifier" | "property_identifier"
155 )
156 {
157 return Some(&self.source[child.byte_range()]);
158 }
159 }
160 None
161 }
162
163 fn c_function_name(&self, function_declarator: Node<'_>) -> Option<&str> {
179 let mut current = function_declarator.child_by_field_name("declarator")?;
180 for _ in 0..32 {
182 match current.kind() {
183 "identifier"
184 | "field_identifier"
185 | "type_identifier"
186 | "property_identifier"
187 | "operator_name"
188 | "destructor_name" => {
189 return Some(&self.source[current.byte_range()]);
190 }
191 "qualified_identifier" | "template_function" => {
192 current = current.child_by_field_name("name")?;
193 }
194 "pointer_declarator"
195 | "reference_declarator"
196 | "function_declarator"
197 | "parenthesized_declarator" => {
198 current = current.child_by_field_name("declarator")?;
199 }
200 _ => return None,
201 }
202 }
203 None
204 }
205
206 fn find_identifier_in_subtree(&self, node: Node<'_>) -> Option<&str> {
207 let mut stack = vec![node];
208 while let Some(current) = stack.pop() {
209 if matches!(
210 current.kind(),
211 "identifier" | "field_identifier" | "type_identifier" | "property_identifier"
212 ) {
213 return Some(&self.source[current.byte_range()]);
214 }
215 push_children_reverse(current, &mut stack);
216 }
217 None
218 }
219
220 fn get_function_signature(&self, node: &Node<'_>) -> String {
221 if node.kind() == "variable_declarator" {
222 return self.get_variable_function_signature(node);
223 }
224
225 let mut signature_parts = Vec::new();
226
227 for i in 0..node.child_count() {
228 if let Some(child) = node.child(i as u32) {
229 let kind = child.kind();
230 if matches!(
231 kind,
232 "identifier"
233 | "field_identifier"
234 | "type_identifier"
235 | "property_identifier"
236 | "parameters"
237 | "formal_parameters"
238 | "parameter_list"
239 | "function_declarator"
240 | "type_parameters"
241 | "type_arguments"
242 | "return_type"
243 | "type_annotation"
244 | "result"
245 ) {
246 signature_parts.push(&self.source[child.byte_range()]);
247 }
248 if matches!(
249 kind,
250 "block" | "compound_statement" | "statement_block" | "suite"
251 ) {
252 break;
253 }
254 }
255 }
256
257 signature_parts.join(" ")
258 }
259
260 fn get_variable_function_signature(&self, node: &Node<'_>) -> String {
261 let Some(name) = node.child_by_field_name("name") else {
262 return String::new();
263 };
264 let Some(value) = node.child_by_field_name("value") else {
265 return self.source[name.byte_range()].to_string();
266 };
267
268 let mut signature_parts = vec![&self.source[name.byte_range()]];
269 for i in 0..value.child_count() {
270 if let Some(child) = value.child(i as u32) {
271 if matches!(child.kind(), "formal_parameters" | "parameters") {
272 signature_parts.push(&self.source[child.byte_range()]);
273 }
274 if matches!(child.kind(), "statement_block" | "body") {
275 break;
276 }
277 }
278 }
279 signature_parts.join(" ")
280 }
281
282 fn extract_rust_imports(&self) -> Vec<Import> {
283 let mut imports = Vec::new();
284 let root = self.root_node();
285
286 for i in 0..root.child_count() {
287 if let Some(child) = root.child(i as u32) {
288 if child.kind() == "use_declaration" {
289 let text = &self.source[child.byte_range()];
290 imports.push(Import {
291 raw: text.to_string(),
292 kind: ImportKind::Use,
293 });
294 } else if child.kind() == "extern_crate_declaration" {
295 let text = &self.source[child.byte_range()];
296 imports.push(Import {
297 raw: text.to_string(),
298 kind: ImportKind::ExternCrate,
299 });
300 }
301 }
302 }
303
304 imports
305 }
306
307 fn extract_imports_by_kind(&self, kinds: &[&str], kind: ImportKind) -> Vec<Import> {
308 let mut imports = Vec::new();
309 let root = self.root_node();
310
311 for i in 0..root.child_count() {
312 if let Some(child) = root.child(i as u32)
313 && kinds.contains(&child.kind())
314 {
315 let text = &self.source[child.byte_range()];
316 imports.push(Import {
317 raw: text.to_string(),
318 kind: kind.clone(),
319 });
320 }
321 }
322
323 imports
324 }
325}
326
327fn is_javascript_function_value(kind: &str) -> bool {
328 matches!(
329 kind,
330 "arrow_function" | "function_expression" | "generator_function"
331 )
332}
333
334fn push_children_reverse<'tree>(node: Node<'tree>, stack: &mut Vec<Node<'tree>>) {
335 let child_count = node.child_count();
336 for index in (0..child_count).rev() {
337 if let Some(child) = node.child(index as u32) {
338 stack.push(child);
339 }
340 }
341}