Skip to main content

semantic/parser/
parser_core.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Core parsing implementation.
3
4use tree_sitter::{Node, Parser, Tree as TSTree};
5
6use super::{
7    parser_language::Language,
8    parser_types::{FunctionDef, Import, ImportKind},
9};
10
11/// A parsed file with its AST.
12#[derive(Debug)]
13pub struct ParsedFile {
14    pub language: Language,
15    pub source: String,
16    tree: TSTree,
17}
18
19impl ParsedFile {
20    /// Parse a file's contents.
21    pub fn parse(source: impl Into<String>, language: Language) -> Option<Self> {
22        let source = source.into();
23        let lang = language.parser()?;
24
25        let mut parser = Parser::new();
26        parser.set_language(&lang).ok()?;
27        let tree = parser.parse(&source, None)?;
28
29        if tree.root_node().has_error() {
30            return None;
31        }
32
33        Some(Self {
34            language,
35            source,
36            tree,
37        })
38    }
39
40    /// Get the root node of the AST.
41    pub fn root_node(&self) -> Node<'_> {
42        self.tree.root_node()
43    }
44
45    /// Extract function definitions from the file.
46    pub fn extract_functions(&self) -> Vec<FunctionDef> {
47        let mut functions = Vec::new();
48        let mut stack = vec![self.root_node()];
49
50        while let Some(node) = stack.pop() {
51            if Self::is_function_node(&node, self.language)
52                && let Some(name) = self.get_function_name(&node)
53            {
54                functions.push(FunctionDef {
55                    name: name.to_string(),
56                    signature: self.get_function_signature(&node),
57                    start_line: node.start_position().row,
58                    end_line: node.end_position().row,
59                    content: self.source[node.byte_range()].to_string(),
60                });
61            }
62
63            push_children_reverse(node, &mut stack);
64        }
65
66        functions
67    }
68
69    /// Extract imports from the file.
70    pub fn extract_imports(&self) -> Vec<Import> {
71        match self.language {
72            Language::Rust => self.extract_rust_imports(),
73            Language::Python => self.extract_imports_by_kind(
74                &["import_statement", "import_from_statement"],
75                ImportKind::Import,
76            ),
77            Language::JavaScript | Language::TypeScript => {
78                self.extract_imports_by_kind(&["import_statement"], ImportKind::Import)
79            }
80            Language::Go | Language::Java => {
81                self.extract_imports_by_kind(&["import_declaration"], ImportKind::Import)
82            }
83            _ => Vec::new(),
84        }
85    }
86
87    /// Check if a node kind string represents a function definition in the given language.
88    pub fn is_function_kind(kind: &str, language: Language) -> bool {
89        match language {
90            Language::Rust => {
91                kind == "function_item"
92                    || kind == "method_declaration"
93                    || kind == "closure_expression"
94            }
95            Language::Python => kind == "function_definition",
96            Language::JavaScript | Language::TypeScript => {
97                kind == "function_declaration"
98                    || kind == "method_definition"
99                    || kind == "generator_function_declaration"
100                    || kind == "variable_declarator"
101            }
102            Language::Go => kind == "function_declaration" || kind == "method_declaration",
103            Language::C | Language::Cpp => kind == "function_definition",
104            Language::Java => kind == "method_declaration" || kind == "constructor_declaration",
105            _ => false,
106        }
107    }
108
109    fn is_function_node(node: &Node<'_>, language: Language) -> bool {
110        match language {
111            Language::Rust => {
112                node.kind() == "function_item"
113                    || node.kind() == "method_declaration"
114                    || node.kind() == "closure_expression"
115            }
116            Language::Python => node.kind() == "function_definition",
117            Language::JavaScript | Language::TypeScript => {
118                node.kind() == "function_declaration"
119                    || node.kind() == "method_definition"
120                    || node.kind() == "generator_function_declaration"
121                    || (node.kind() == "variable_declarator"
122                        && node
123                            .child_by_field_name("value")
124                            .is_some_and(|value| is_javascript_function_value(value.kind())))
125            }
126            Language::Go => {
127                node.kind() == "function_declaration" || node.kind() == "method_declaration"
128            }
129            Language::C | Language::Cpp => node.kind() == "function_definition",
130            Language::Java => {
131                node.kind() == "method_declaration" || node.kind() == "constructor_declaration"
132            }
133            _ => false,
134        }
135    }
136
137    fn get_function_name(&self, node: &Node<'_>) -> Option<&str> {
138        if let Some(name) = node.child_by_field_name("name") {
139            return Some(&self.source[name.byte_range()]);
140        }
141        if let Some(declarator) = node.child_by_field_name("declarator")
142            && let Some(name) = self.find_identifier_in_subtree(declarator)
143        {
144            return Some(name);
145        }
146
147        for i in 0..node.child_count() {
148            if let Some(child) = node.child(i as u32)
149                && matches!(
150                    child.kind(),
151                    "identifier" | "field_identifier" | "type_identifier" | "property_identifier"
152                )
153            {
154                return Some(&self.source[child.byte_range()]);
155            }
156        }
157        None
158    }
159
160    fn find_identifier_in_subtree(&self, node: Node<'_>) -> Option<&str> {
161        let mut stack = vec![node];
162        while let Some(current) = stack.pop() {
163            if matches!(
164                current.kind(),
165                "identifier" | "field_identifier" | "type_identifier" | "property_identifier"
166            ) {
167                return Some(&self.source[current.byte_range()]);
168            }
169            push_children_reverse(current, &mut stack);
170        }
171        None
172    }
173
174    fn get_function_signature(&self, node: &Node<'_>) -> String {
175        if node.kind() == "variable_declarator" {
176            return self.get_variable_function_signature(node);
177        }
178
179        let mut signature_parts = Vec::new();
180
181        for i in 0..node.child_count() {
182            if let Some(child) = node.child(i as u32) {
183                let kind = child.kind();
184                if matches!(
185                    kind,
186                    "identifier"
187                        | "field_identifier"
188                        | "type_identifier"
189                        | "property_identifier"
190                        | "parameters"
191                        | "formal_parameters"
192                        | "parameter_list"
193                        | "function_declarator"
194                        | "type_parameters"
195                        | "type_arguments"
196                        | "return_type"
197                        | "type_annotation"
198                        | "result"
199                ) {
200                    signature_parts.push(&self.source[child.byte_range()]);
201                }
202                if matches!(
203                    kind,
204                    "block" | "compound_statement" | "statement_block" | "suite"
205                ) {
206                    break;
207                }
208            }
209        }
210
211        signature_parts.join(" ")
212    }
213
214    fn get_variable_function_signature(&self, node: &Node<'_>) -> String {
215        let Some(name) = node.child_by_field_name("name") else {
216            return String::new();
217        };
218        let Some(value) = node.child_by_field_name("value") else {
219            return self.source[name.byte_range()].to_string();
220        };
221
222        let mut signature_parts = vec![&self.source[name.byte_range()]];
223        for i in 0..value.child_count() {
224            if let Some(child) = value.child(i as u32) {
225                if matches!(child.kind(), "formal_parameters" | "parameters") {
226                    signature_parts.push(&self.source[child.byte_range()]);
227                }
228                if matches!(child.kind(), "statement_block" | "body") {
229                    break;
230                }
231            }
232        }
233        signature_parts.join(" ")
234    }
235
236    fn extract_rust_imports(&self) -> Vec<Import> {
237        let mut imports = Vec::new();
238        let root = self.root_node();
239
240        for i in 0..root.child_count() {
241            if let Some(child) = root.child(i as u32) {
242                if child.kind() == "use_declaration" {
243                    let text = &self.source[child.byte_range()];
244                    imports.push(Import {
245                        raw: text.to_string(),
246                        kind: ImportKind::Use,
247                    });
248                } else if child.kind() == "extern_crate_declaration" {
249                    let text = &self.source[child.byte_range()];
250                    imports.push(Import {
251                        raw: text.to_string(),
252                        kind: ImportKind::ExternCrate,
253                    });
254                }
255            }
256        }
257
258        imports
259    }
260
261    fn extract_imports_by_kind(&self, kinds: &[&str], kind: ImportKind) -> Vec<Import> {
262        let mut imports = Vec::new();
263        let root = self.root_node();
264
265        for i in 0..root.child_count() {
266            if let Some(child) = root.child(i as u32)
267                && kinds.contains(&child.kind())
268            {
269                let text = &self.source[child.byte_range()];
270                imports.push(Import {
271                    raw: text.to_string(),
272                    kind: kind.clone(),
273                });
274            }
275        }
276
277        imports
278    }
279}
280
281fn is_javascript_function_value(kind: &str) -> bool {
282    matches!(
283        kind,
284        "arrow_function" | "function_expression" | "generator_function"
285    )
286}
287
288fn push_children_reverse<'tree>(node: Node<'tree>, stack: &mut Vec<Node<'tree>>) {
289    let child_count = node.child_count();
290    for index in (0..child_count).rev() {
291        if let Some(child) = node.child(index as u32) {
292            stack.push(child);
293        }
294    }
295}