Skip to main content

semantic/parser/
syntax_index.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Compact syntax index derived from a tree-sitter parse tree.
3
4use std::ops::Range;
5
6use tree_sitter::Node;
7
8use super::{
9    parser_language::Language,
10    parser_types::{FunctionDef, Import, ImportKind},
11};
12
13/// Compact Heddle-owned syntax data for one parsed source file.
14#[derive(Debug)]
15pub struct SyntaxIndex {
16    functions: Vec<IndexedFunction>,
17    imports: Vec<IndexedImport>,
18    line_offsets: Vec<usize>,
19}
20
21/// Borrowed view of a function indexed in a [`SyntaxIndex`].
22#[derive(Clone, Copy, Debug)]
23pub struct FunctionRef<'a> {
24    inner: &'a IndexedFunction,
25    source: &'a str,
26}
27
28/// Borrowed view of an import indexed in a [`SyntaxIndex`].
29#[derive(Clone, Copy, Debug)]
30pub struct ImportRef<'a> {
31    inner: &'a IndexedImport,
32    source: &'a str,
33}
34
35#[derive(Debug)]
36struct IndexedFunction {
37    name: String,
38    signature: String,
39    start_line: usize,
40    end_line: usize,
41    content: Range<usize>,
42}
43
44#[derive(Debug)]
45struct IndexedImport {
46    raw: Range<usize>,
47    kind: ImportKind,
48}
49
50impl SyntaxIndex {
51    pub(super) fn build(language: Language, source: &str, root: Node<'_>) -> Self {
52        let mut index = Self {
53            functions: Vec::new(),
54            imports: Vec::new(),
55            line_offsets: line_offsets(source),
56        };
57
58        let mut stack = vec![root];
59        while let Some(node) = stack.pop() {
60            if is_function_node(&node, language)
61                && let Some(name) = function_name(&node, source)
62            {
63                index.functions.push(IndexedFunction {
64                    name: name.to_string(),
65                    signature: function_signature(&node, source),
66                    start_line: node.start_position().row,
67                    end_line: node.end_position().row,
68                    content: node.byte_range(),
69                });
70            }
71
72            push_children_reverse(node, &mut stack);
73        }
74
75        let mut cursor = root.walk();
76        for child in root.children(&mut cursor) {
77            match language {
78                Language::Rust => match child.kind() {
79                    "use_declaration" => index.imports.push(IndexedImport {
80                        raw: child.byte_range(),
81                        kind: ImportKind::Use,
82                    }),
83                    "extern_crate_declaration" => index.imports.push(IndexedImport {
84                        raw: child.byte_range(),
85                        kind: ImportKind::ExternCrate,
86                    }),
87                    _ => {}
88                },
89                Language::Python => {
90                    if matches!(child.kind(), "import_statement" | "import_from_statement") {
91                        index.imports.push(IndexedImport {
92                            raw: child.byte_range(),
93                            kind: ImportKind::Import,
94                        });
95                    }
96                }
97                Language::JavaScript | Language::TypeScript => {
98                    if child.kind() == "import_statement" {
99                        index.imports.push(IndexedImport {
100                            raw: child.byte_range(),
101                            kind: ImportKind::Import,
102                        });
103                    }
104                }
105                Language::Go | Language::Java => {
106                    if child.kind() == "import_declaration" {
107                        index.imports.push(IndexedImport {
108                            raw: child.byte_range(),
109                            kind: ImportKind::Import,
110                        });
111                    }
112                }
113                Language::C | Language::Cpp | Language::Unknown => {}
114            }
115        }
116
117        index
118    }
119
120    pub fn functions<'a>(&'a self, source: &'a str) -> impl Iterator<Item = FunctionRef<'a>> + 'a {
121        self.functions
122            .iter()
123            .map(move |inner| FunctionRef { inner, source })
124    }
125
126    pub fn imports<'a>(&'a self, source: &'a str) -> impl Iterator<Item = ImportRef<'a>> + 'a {
127        self.imports
128            .iter()
129            .map(move |inner| ImportRef { inner, source })
130    }
131
132    /// Byte offsets where each line starts. The first entry is always `0`.
133    pub fn line_offsets(&self) -> &[usize] {
134        &self.line_offsets
135    }
136}
137
138impl FunctionRef<'_> {
139    pub fn name(&self) -> &str {
140        &self.inner.name
141    }
142
143    pub fn signature(&self) -> &str {
144        &self.inner.signature
145    }
146
147    pub fn start_line(&self) -> usize {
148        self.inner.start_line
149    }
150
151    pub fn end_line(&self) -> usize {
152        self.inner.end_line
153    }
154
155    pub fn content(&self) -> &str {
156        &self.source[self.inner.content.clone()]
157    }
158
159    pub fn to_owned(self) -> FunctionDef {
160        FunctionDef {
161            name: self.name().to_string(),
162            signature: self.signature().to_string(),
163            start_line: self.start_line(),
164            end_line: self.end_line(),
165            content: self.content().to_string(),
166        }
167    }
168}
169
170impl ImportRef<'_> {
171    pub fn raw(&self) -> &str {
172        &self.source[self.inner.raw.clone()]
173    }
174
175    pub fn kind(&self) -> ImportKind {
176        self.inner.kind
177    }
178
179    pub fn to_owned(self) -> Import {
180        Import {
181            raw: self.raw().to_string(),
182            kind: self.kind(),
183        }
184    }
185}
186
187pub(super) fn is_function_kind(kind: &str, language: Language) -> bool {
188    match language {
189        Language::Rust => {
190            kind == "function_item" || kind == "method_declaration" || kind == "closure_expression"
191        }
192        Language::Python => kind == "function_definition",
193        Language::JavaScript | Language::TypeScript => {
194            kind == "function_declaration"
195                || kind == "method_definition"
196                || kind == "generator_function_declaration"
197                || kind == "variable_declarator"
198        }
199        Language::Go => kind == "function_declaration" || kind == "method_declaration",
200        Language::C | Language::Cpp => kind == "function_definition",
201        Language::Java => kind == "method_declaration" || kind == "constructor_declaration",
202        Language::Unknown => false,
203    }
204}
205
206fn is_function_node(node: &Node<'_>, language: Language) -> bool {
207    match language {
208        Language::JavaScript | Language::TypeScript => {
209            matches!(
210                node.kind(),
211                "function_declaration" | "method_definition" | "generator_function_declaration"
212            ) || (node.kind() == "variable_declarator"
213                && node
214                    .child_by_field_name("value")
215                    .is_some_and(|value| is_javascript_function_value(value.kind())))
216        }
217        _ => is_function_kind(node.kind(), language),
218    }
219}
220
221fn function_name<'a>(node: &Node<'_>, source: &'a str) -> Option<&'a str> {
222    if let Some(name) = node.child_by_field_name("name") {
223        return Some(&source[name.byte_range()]);
224    }
225    if let Some(declarator) = node.child_by_field_name("declarator") {
226        if let Some(name) = c_function_name(declarator, source) {
227            return Some(name);
228        }
229        if let Some(name) = first_identifier_in_subtree(declarator, source) {
230            return Some(name);
231        }
232    }
233
234    let mut cursor = node.walk();
235    for child in node.children(&mut cursor) {
236        if matches!(
237            child.kind(),
238            "identifier" | "field_identifier" | "type_identifier" | "property_identifier"
239        ) {
240            return Some(&source[child.byte_range()]);
241        }
242    }
243    None
244}
245
246fn c_function_name<'a>(function_declarator: Node<'_>, source: &'a str) -> Option<&'a str> {
247    let mut current = function_declarator.child_by_field_name("declarator")?;
248    for _ in 0..32 {
249        match current.kind() {
250            "identifier"
251            | "field_identifier"
252            | "type_identifier"
253            | "property_identifier"
254            | "operator_name"
255            | "destructor_name" => return Some(&source[current.byte_range()]),
256            "qualified_identifier" | "template_function" => {
257                current = current.child_by_field_name("name")?;
258            }
259            "pointer_declarator"
260            | "reference_declarator"
261            | "function_declarator"
262            | "parenthesized_declarator" => {
263                current = current.child_by_field_name("declarator")?;
264            }
265            _ => return None,
266        }
267    }
268    None
269}
270
271fn first_identifier_in_subtree<'a>(node: Node<'_>, source: &'a str) -> Option<&'a str> {
272    let mut stack = vec![node];
273    while let Some(current) = stack.pop() {
274        if matches!(
275            current.kind(),
276            "identifier" | "field_identifier" | "type_identifier" | "property_identifier"
277        ) {
278            return Some(&source[current.byte_range()]);
279        }
280        push_children_reverse(current, &mut stack);
281    }
282    None
283}
284
285fn function_signature(node: &Node<'_>, source: &str) -> String {
286    if node.kind() == "variable_declarator" {
287        return variable_function_signature(node, source);
288    }
289
290    let mut signature_parts = Vec::new();
291    let mut cursor = node.walk();
292    for child in node.children(&mut cursor) {
293        let kind = child.kind();
294        if matches!(
295            kind,
296            "identifier"
297                | "field_identifier"
298                | "type_identifier"
299                | "property_identifier"
300                | "parameters"
301                | "formal_parameters"
302                | "parameter_list"
303                | "function_declarator"
304                | "type_parameters"
305                | "type_arguments"
306                | "return_type"
307                | "type_annotation"
308                | "result"
309        ) {
310            signature_parts.push(&source[child.byte_range()]);
311        }
312        if matches!(
313            kind,
314            "block" | "compound_statement" | "statement_block" | "suite"
315        ) {
316            break;
317        }
318    }
319
320    signature_parts.join(" ")
321}
322
323fn variable_function_signature(node: &Node<'_>, source: &str) -> String {
324    let Some(name) = node.child_by_field_name("name") else {
325        return String::new();
326    };
327    let Some(value) = node.child_by_field_name("value") else {
328        return source[name.byte_range()].to_string();
329    };
330
331    let mut signature_parts = vec![&source[name.byte_range()]];
332    let mut cursor = value.walk();
333    for child in value.children(&mut cursor) {
334        if matches!(child.kind(), "formal_parameters" | "parameters") {
335            signature_parts.push(&source[child.byte_range()]);
336        }
337        if matches!(child.kind(), "statement_block" | "body") {
338            break;
339        }
340    }
341    signature_parts.join(" ")
342}
343
344fn line_offsets(source: &str) -> Vec<usize> {
345    let mut offsets =
346        Vec::with_capacity(source.as_bytes().iter().filter(|&&b| b == b'\n').count() + 1);
347    offsets.push(0);
348    for (index, byte) in source.bytes().enumerate() {
349        if byte == b'\n' && index + 1 < source.len() {
350            offsets.push(index + 1);
351        }
352    }
353    offsets
354}
355
356fn is_javascript_function_value(kind: &str) -> bool {
357    matches!(
358        kind,
359        "arrow_function" | "function_expression" | "generator_function"
360    )
361}
362
363fn push_children_reverse<'tree>(node: Node<'tree>, stack: &mut Vec<Node<'tree>>) {
364    let child_count = node.child_count();
365    for index in (0..child_count).rev() {
366        if let Some(child) = node.child(index as u32) {
367            stack.push(child);
368        }
369    }
370}