Skip to main content

semantic/parser/
parser_core.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Core parsing implementation.
3
4use tree_sitter::{Node, Parser, Tree as TSTree};
5
6use super::{
7    parser_language::Language,
8    parser_types::{FunctionDef, Import, ImportKind},
9};
10
11/// A parsed file with its AST.
12#[derive(Debug)]
13pub struct ParsedFile {
14    pub language: Language,
15    pub source: String,
16    tree: TSTree,
17}
18
19impl ParsedFile {
20    /// Parse a file's contents.
21    pub fn parse(source: impl Into<String>, language: Language) -> Option<Self> {
22        let source = source.into();
23        let lang = language.parser()?;
24
25        let mut parser = Parser::new();
26        parser.set_language(&lang).ok()?;
27        let tree = parser.parse(&source, None)?;
28
29        if tree.root_node().has_error() {
30            return None;
31        }
32
33        Some(Self {
34            language,
35            source,
36            tree,
37        })
38    }
39
40    /// Get the root node of the AST.
41    pub fn root_node(&self) -> Node<'_> {
42        self.tree.root_node()
43    }
44
45    /// Extract function definitions from the file.
46    pub fn extract_functions(&self) -> Vec<FunctionDef> {
47        let mut functions = Vec::new();
48        let mut stack = vec![self.root_node()];
49
50        while let Some(node) = stack.pop() {
51            if Self::is_function_node(&node, self.language)
52                && let Some(name) = self.get_function_name(&node)
53            {
54                functions.push(FunctionDef {
55                    name: name.to_string(),
56                    signature: self.get_function_signature(&node),
57                    start_line: node.start_position().row,
58                    end_line: node.end_position().row,
59                    content: self.source[node.byte_range()].to_string(),
60                });
61            }
62
63            push_children_reverse(node, &mut stack);
64        }
65
66        functions
67    }
68
69    /// Extract imports from the file.
70    pub fn extract_imports(&self) -> Vec<Import> {
71        match self.language {
72            Language::Rust => self.extract_rust_imports(),
73            Language::Python => self.extract_imports_by_kind(
74                &["import_statement", "import_from_statement"],
75                ImportKind::Import,
76            ),
77            Language::JavaScript | Language::TypeScript => {
78                self.extract_imports_by_kind(&["import_statement"], ImportKind::Import)
79            }
80            Language::Go | Language::Java => {
81                self.extract_imports_by_kind(&["import_declaration"], ImportKind::Import)
82            }
83            _ => Vec::new(),
84        }
85    }
86
87    /// Check if a node kind string represents a function definition in the given language.
88    pub fn is_function_kind(kind: &str, language: Language) -> bool {
89        match language {
90            Language::Rust => {
91                kind == "function_item"
92                    || kind == "method_declaration"
93                    || kind == "closure_expression"
94            }
95            Language::Python => kind == "function_definition",
96            Language::JavaScript | Language::TypeScript => {
97                kind == "function_declaration"
98                    || kind == "method_definition"
99                    || kind == "generator_function_declaration"
100                    || kind == "variable_declarator"
101            }
102            Language::Go => kind == "function_declaration" || kind == "method_declaration",
103            Language::C | Language::Cpp => kind == "function_definition",
104            Language::Java => kind == "method_declaration" || kind == "constructor_declaration",
105            _ => false,
106        }
107    }
108
109    fn is_function_node(node: &Node<'_>, language: Language) -> bool {
110        match language {
111            Language::Rust => {
112                node.kind() == "function_item"
113                    || node.kind() == "method_declaration"
114                    || node.kind() == "closure_expression"
115            }
116            Language::Python => node.kind() == "function_definition",
117            Language::JavaScript | Language::TypeScript => {
118                node.kind() == "function_declaration"
119                    || node.kind() == "method_definition"
120                    || node.kind() == "generator_function_declaration"
121                    || (node.kind() == "variable_declarator"
122                        && node
123                            .child_by_field_name("value")
124                            .is_some_and(|value| is_javascript_function_value(value.kind())))
125            }
126            Language::Go => {
127                node.kind() == "function_declaration" || node.kind() == "method_declaration"
128            }
129            Language::C | Language::Cpp => node.kind() == "function_definition",
130            Language::Java => {
131                node.kind() == "method_declaration" || node.kind() == "constructor_declaration"
132            }
133            _ => false,
134        }
135    }
136
137    fn get_function_name(&self, node: &Node<'_>) -> Option<&str> {
138        if let Some(name) = node.child_by_field_name("name") {
139            return Some(&self.source[name.byte_range()]);
140        }
141        if let Some(declarator) = node.child_by_field_name("declarator") {
142            if let Some(name) = self.c_function_name(declarator) {
143                return Some(name);
144            }
145            if let Some(name) = self.find_identifier_in_subtree(declarator) {
146                return Some(name);
147            }
148        }
149
150        for i in 0..node.child_count() {
151            if let Some(child) = node.child(i as u32)
152                && matches!(
153                    child.kind(),
154                    "identifier" | "field_identifier" | "type_identifier" | "property_identifier"
155                )
156            {
157                return Some(&self.source[child.byte_range()]);
158            }
159        }
160        None
161    }
162
163    /// Resolve the actual function name from a C/C++ declarator.
164    ///
165    /// Mirrors `merge_driver::items::c_function_name` (heddle#114
166    /// commit `dc37af8`, Codex r5 P1 #2). A plain DFS over the
167    /// declarator subtree returns the FIRST identifier-ish leaf — for
168    /// a templated qualified name like `void Foo<U>::bar()` the
169    /// scope's inner `type_identifier` ("Foo") wins, so every method
170    /// on the same scope collapses to name="Foo". Instead, walk the
171    /// declarator's `declarator` field, peel wrapper layers, and
172    /// recurse into `qualified_identifier` / `template_function`'s
173    /// `name` field so the scope's identifier never wins.
174    ///
175    /// Duplicated rather than lifted to a shared module: the function
176    /// is short, and `parser_core` vs `merge_driver` are different
177    /// concerns. Lift if a third caller appears.
178    fn c_function_name(&self, function_declarator: Node<'_>) -> Option<&str> {
179        let mut current = function_declarator.child_by_field_name("declarator")?;
180        // Cap traversal so a pathological wrapper chain doesn't loop.
181        for _ in 0..32 {
182            match current.kind() {
183                "identifier"
184                | "field_identifier"
185                | "type_identifier"
186                | "property_identifier"
187                | "operator_name"
188                | "destructor_name" => {
189                    return Some(&self.source[current.byte_range()]);
190                }
191                "qualified_identifier" | "template_function" => {
192                    current = current.child_by_field_name("name")?;
193                }
194                "pointer_declarator"
195                | "reference_declarator"
196                | "function_declarator"
197                | "parenthesized_declarator" => {
198                    current = current.child_by_field_name("declarator")?;
199                }
200                _ => return None,
201            }
202        }
203        None
204    }
205
206    fn find_identifier_in_subtree(&self, node: Node<'_>) -> Option<&str> {
207        let mut stack = vec![node];
208        while let Some(current) = stack.pop() {
209            if matches!(
210                current.kind(),
211                "identifier" | "field_identifier" | "type_identifier" | "property_identifier"
212            ) {
213                return Some(&self.source[current.byte_range()]);
214            }
215            push_children_reverse(current, &mut stack);
216        }
217        None
218    }
219
220    fn get_function_signature(&self, node: &Node<'_>) -> String {
221        if node.kind() == "variable_declarator" {
222            return self.get_variable_function_signature(node);
223        }
224
225        let mut signature_parts = Vec::new();
226
227        for i in 0..node.child_count() {
228            if let Some(child) = node.child(i as u32) {
229                let kind = child.kind();
230                if matches!(
231                    kind,
232                    "identifier"
233                        | "field_identifier"
234                        | "type_identifier"
235                        | "property_identifier"
236                        | "parameters"
237                        | "formal_parameters"
238                        | "parameter_list"
239                        | "function_declarator"
240                        | "type_parameters"
241                        | "type_arguments"
242                        | "return_type"
243                        | "type_annotation"
244                        | "result"
245                ) {
246                    signature_parts.push(&self.source[child.byte_range()]);
247                }
248                if matches!(
249                    kind,
250                    "block" | "compound_statement" | "statement_block" | "suite"
251                ) {
252                    break;
253                }
254            }
255        }
256
257        signature_parts.join(" ")
258    }
259
260    fn get_variable_function_signature(&self, node: &Node<'_>) -> String {
261        let Some(name) = node.child_by_field_name("name") else {
262            return String::new();
263        };
264        let Some(value) = node.child_by_field_name("value") else {
265            return self.source[name.byte_range()].to_string();
266        };
267
268        let mut signature_parts = vec![&self.source[name.byte_range()]];
269        for i in 0..value.child_count() {
270            if let Some(child) = value.child(i as u32) {
271                if matches!(child.kind(), "formal_parameters" | "parameters") {
272                    signature_parts.push(&self.source[child.byte_range()]);
273                }
274                if matches!(child.kind(), "statement_block" | "body") {
275                    break;
276                }
277            }
278        }
279        signature_parts.join(" ")
280    }
281
282    fn extract_rust_imports(&self) -> Vec<Import> {
283        let mut imports = Vec::new();
284        let root = self.root_node();
285
286        for i in 0..root.child_count() {
287            if let Some(child) = root.child(i as u32) {
288                if child.kind() == "use_declaration" {
289                    let text = &self.source[child.byte_range()];
290                    imports.push(Import {
291                        raw: text.to_string(),
292                        kind: ImportKind::Use,
293                    });
294                } else if child.kind() == "extern_crate_declaration" {
295                    let text = &self.source[child.byte_range()];
296                    imports.push(Import {
297                        raw: text.to_string(),
298                        kind: ImportKind::ExternCrate,
299                    });
300                }
301            }
302        }
303
304        imports
305    }
306
307    fn extract_imports_by_kind(&self, kinds: &[&str], kind: ImportKind) -> Vec<Import> {
308        let mut imports = Vec::new();
309        let root = self.root_node();
310
311        for i in 0..root.child_count() {
312            if let Some(child) = root.child(i as u32)
313                && kinds.contains(&child.kind())
314            {
315                let text = &self.source[child.byte_range()];
316                imports.push(Import {
317                    raw: text.to_string(),
318                    kind: kind.clone(),
319                });
320            }
321        }
322
323        imports
324    }
325}
326
327fn is_javascript_function_value(kind: &str) -> bool {
328    matches!(
329        kind,
330        "arrow_function" | "function_expression" | "generator_function"
331    )
332}
333
334fn push_children_reverse<'tree>(node: Node<'tree>, stack: &mut Vec<Node<'tree>>) {
335    let child_count = node.child_count();
336    for index in (0..child_count).rev() {
337        if let Some(child) = node.child(index as u32) {
338            stack.push(child);
339        }
340    }
341}