semantic/parser/
parser_core.rs1use tree_sitter::{Node, Parser, Tree as TSTree};
5
6use super::{
7 parser_language::Language,
8 parser_types::{FunctionDef, Import, ImportKind},
9};
10
11#[derive(Debug)]
13pub struct ParsedFile {
14 pub language: Language,
15 pub source: String,
16 tree: TSTree,
17}
18
19impl ParsedFile {
20 pub fn parse(source: impl Into<String>, language: Language) -> Option<Self> {
22 let source = source.into();
23 let lang = language.parser()?;
24
25 let mut parser = Parser::new();
26 parser.set_language(&lang).ok()?;
27 let tree = parser.parse(&source, None)?;
28
29 if tree.root_node().has_error() {
30 return None;
31 }
32
33 Some(Self {
34 language,
35 source,
36 tree,
37 })
38 }
39
40 pub fn root_node(&self) -> Node<'_> {
42 self.tree.root_node()
43 }
44
45 pub fn extract_functions(&self) -> Vec<FunctionDef> {
47 let mut functions = Vec::new();
48 let mut stack = vec![self.root_node()];
49
50 while let Some(node) = stack.pop() {
51 if Self::is_function_node(&node, self.language)
52 && let Some(name) = self.get_function_name(&node)
53 {
54 functions.push(FunctionDef {
55 name: name.to_string(),
56 signature: self.get_function_signature(&node),
57 start_line: node.start_position().row,
58 end_line: node.end_position().row,
59 content: self.source[node.byte_range()].to_string(),
60 });
61 }
62
63 push_children_reverse(node, &mut stack);
64 }
65
66 functions
67 }
68
69 pub fn extract_imports(&self) -> Vec<Import> {
71 match self.language {
72 Language::Rust => self.extract_rust_imports(),
73 Language::Python => self.extract_imports_by_kind(
74 &["import_statement", "import_from_statement"],
75 ImportKind::Import,
76 ),
77 Language::JavaScript | Language::TypeScript => {
78 self.extract_imports_by_kind(&["import_statement"], ImportKind::Import)
79 }
80 Language::Go | Language::Java => {
81 self.extract_imports_by_kind(&["import_declaration"], ImportKind::Import)
82 }
83 _ => Vec::new(),
84 }
85 }
86
87 pub fn is_function_kind(kind: &str, language: Language) -> bool {
89 match language {
90 Language::Rust => {
91 kind == "function_item"
92 || kind == "method_declaration"
93 || kind == "closure_expression"
94 }
95 Language::Python => kind == "function_definition",
96 Language::JavaScript | Language::TypeScript => {
97 kind == "function_declaration"
98 || kind == "method_definition"
99 || kind == "generator_function_declaration"
100 || kind == "variable_declarator"
101 }
102 Language::Go => kind == "function_declaration" || kind == "method_declaration",
103 Language::C | Language::Cpp => kind == "function_definition",
104 Language::Java => kind == "method_declaration" || kind == "constructor_declaration",
105 _ => false,
106 }
107 }
108
109 fn is_function_node(node: &Node<'_>, language: Language) -> bool {
110 match language {
111 Language::Rust => {
112 node.kind() == "function_item"
113 || node.kind() == "method_declaration"
114 || node.kind() == "closure_expression"
115 }
116 Language::Python => node.kind() == "function_definition",
117 Language::JavaScript | Language::TypeScript => {
118 node.kind() == "function_declaration"
119 || node.kind() == "method_definition"
120 || node.kind() == "generator_function_declaration"
121 || (node.kind() == "variable_declarator"
122 && node
123 .child_by_field_name("value")
124 .is_some_and(|value| is_javascript_function_value(value.kind())))
125 }
126 Language::Go => {
127 node.kind() == "function_declaration" || node.kind() == "method_declaration"
128 }
129 Language::C | Language::Cpp => node.kind() == "function_definition",
130 Language::Java => {
131 node.kind() == "method_declaration" || node.kind() == "constructor_declaration"
132 }
133 _ => false,
134 }
135 }
136
137 fn get_function_name(&self, node: &Node<'_>) -> Option<&str> {
138 if let Some(name) = node.child_by_field_name("name") {
139 return Some(&self.source[name.byte_range()]);
140 }
141 if let Some(declarator) = node.child_by_field_name("declarator")
142 && let Some(name) = self.find_identifier_in_subtree(declarator)
143 {
144 return Some(name);
145 }
146
147 for i in 0..node.child_count() {
148 if let Some(child) = node.child(i as u32)
149 && matches!(
150 child.kind(),
151 "identifier" | "field_identifier" | "type_identifier" | "property_identifier"
152 )
153 {
154 return Some(&self.source[child.byte_range()]);
155 }
156 }
157 None
158 }
159
160 fn find_identifier_in_subtree(&self, node: Node<'_>) -> Option<&str> {
161 let mut stack = vec![node];
162 while let Some(current) = stack.pop() {
163 if matches!(
164 current.kind(),
165 "identifier" | "field_identifier" | "type_identifier" | "property_identifier"
166 ) {
167 return Some(&self.source[current.byte_range()]);
168 }
169 push_children_reverse(current, &mut stack);
170 }
171 None
172 }
173
174 fn get_function_signature(&self, node: &Node<'_>) -> String {
175 if node.kind() == "variable_declarator" {
176 return self.get_variable_function_signature(node);
177 }
178
179 let mut signature_parts = Vec::new();
180
181 for i in 0..node.child_count() {
182 if let Some(child) = node.child(i as u32) {
183 let kind = child.kind();
184 if matches!(
185 kind,
186 "identifier"
187 | "field_identifier"
188 | "type_identifier"
189 | "property_identifier"
190 | "parameters"
191 | "formal_parameters"
192 | "parameter_list"
193 | "function_declarator"
194 | "type_parameters"
195 | "type_arguments"
196 | "return_type"
197 | "type_annotation"
198 | "result"
199 ) {
200 signature_parts.push(&self.source[child.byte_range()]);
201 }
202 if matches!(
203 kind,
204 "block" | "compound_statement" | "statement_block" | "suite"
205 ) {
206 break;
207 }
208 }
209 }
210
211 signature_parts.join(" ")
212 }
213
214 fn get_variable_function_signature(&self, node: &Node<'_>) -> String {
215 let Some(name) = node.child_by_field_name("name") else {
216 return String::new();
217 };
218 let Some(value) = node.child_by_field_name("value") else {
219 return self.source[name.byte_range()].to_string();
220 };
221
222 let mut signature_parts = vec![&self.source[name.byte_range()]];
223 for i in 0..value.child_count() {
224 if let Some(child) = value.child(i as u32) {
225 if matches!(child.kind(), "formal_parameters" | "parameters") {
226 signature_parts.push(&self.source[child.byte_range()]);
227 }
228 if matches!(child.kind(), "statement_block" | "body") {
229 break;
230 }
231 }
232 }
233 signature_parts.join(" ")
234 }
235
236 fn extract_rust_imports(&self) -> Vec<Import> {
237 let mut imports = Vec::new();
238 let root = self.root_node();
239
240 for i in 0..root.child_count() {
241 if let Some(child) = root.child(i as u32) {
242 if child.kind() == "use_declaration" {
243 let text = &self.source[child.byte_range()];
244 imports.push(Import {
245 raw: text.to_string(),
246 kind: ImportKind::Use,
247 });
248 } else if child.kind() == "extern_crate_declaration" {
249 let text = &self.source[child.byte_range()];
250 imports.push(Import {
251 raw: text.to_string(),
252 kind: ImportKind::ExternCrate,
253 });
254 }
255 }
256 }
257
258 imports
259 }
260
261 fn extract_imports_by_kind(&self, kinds: &[&str], kind: ImportKind) -> Vec<Import> {
262 let mut imports = Vec::new();
263 let root = self.root_node();
264
265 for i in 0..root.child_count() {
266 if let Some(child) = root.child(i as u32)
267 && kinds.contains(&child.kind())
268 {
269 let text = &self.source[child.byte_range()];
270 imports.push(Import {
271 raw: text.to_string(),
272 kind: kind.clone(),
273 });
274 }
275 }
276
277 imports
278 }
279}
280
281fn is_javascript_function_value(kind: &str) -> bool {
282 matches!(
283 kind,
284 "arrow_function" | "function_expression" | "generator_function"
285 )
286}
287
288fn push_children_reverse<'tree>(node: Node<'tree>, stack: &mut Vec<Node<'tree>>) {
289 let child_count = node.child_count();
290 for index in (0..child_count).rev() {
291 if let Some(child) = node.child(index as u32) {
292 stack.push(child);
293 }
294 }
295}