1use std::ops::Range;
5
6use tree_sitter::Node;
7
8use super::{
9 parser_language::Language,
10 parser_types::{FunctionDef, Import, ImportKind},
11};
12
13#[derive(Debug)]
15pub struct SyntaxIndex {
16 functions: Vec<IndexedFunction>,
17 imports: Vec<IndexedImport>,
18 line_offsets: Vec<usize>,
19}
20
21#[derive(Clone, Copy, Debug)]
23pub struct FunctionRef<'a> {
24 inner: &'a IndexedFunction,
25 source: &'a str,
26}
27
28#[derive(Clone, Copy, Debug)]
30pub struct ImportRef<'a> {
31 inner: &'a IndexedImport,
32 source: &'a str,
33}
34
35#[derive(Debug)]
36struct IndexedFunction {
37 name: String,
38 signature: String,
39 start_line: usize,
40 end_line: usize,
41 content: Range<usize>,
42}
43
44#[derive(Debug)]
45struct IndexedImport {
46 raw: Range<usize>,
47 kind: ImportKind,
48}
49
50impl SyntaxIndex {
51 pub(super) fn build(language: Language, source: &str, root: Node<'_>) -> Self {
52 let mut index = Self {
53 functions: Vec::new(),
54 imports: Vec::new(),
55 line_offsets: line_offsets(source),
56 };
57
58 let mut stack = vec![root];
59 while let Some(node) = stack.pop() {
60 if is_function_node(&node, language)
61 && let Some(name) = function_name(&node, source)
62 {
63 index.functions.push(IndexedFunction {
64 name: name.to_string(),
65 signature: function_signature(&node, source),
66 start_line: node.start_position().row,
67 end_line: node.end_position().row,
68 content: node.byte_range(),
69 });
70 }
71
72 push_children_reverse(node, &mut stack);
73 }
74
75 let mut cursor = root.walk();
76 for child in root.children(&mut cursor) {
77 match language {
78 Language::Rust => match child.kind() {
79 "use_declaration" => index.imports.push(IndexedImport {
80 raw: child.byte_range(),
81 kind: ImportKind::Use,
82 }),
83 "extern_crate_declaration" => index.imports.push(IndexedImport {
84 raw: child.byte_range(),
85 kind: ImportKind::ExternCrate,
86 }),
87 _ => {}
88 },
89 Language::Python => {
90 if matches!(child.kind(), "import_statement" | "import_from_statement") {
91 index.imports.push(IndexedImport {
92 raw: child.byte_range(),
93 kind: ImportKind::Import,
94 });
95 }
96 }
97 Language::JavaScript | Language::TypeScript => {
98 if child.kind() == "import_statement" {
99 index.imports.push(IndexedImport {
100 raw: child.byte_range(),
101 kind: ImportKind::Import,
102 });
103 }
104 }
105 Language::Go | Language::Java => {
106 if child.kind() == "import_declaration" {
107 index.imports.push(IndexedImport {
108 raw: child.byte_range(),
109 kind: ImportKind::Import,
110 });
111 }
112 }
113 Language::C | Language::Cpp | Language::Unknown => {}
114 }
115 }
116
117 index
118 }
119
120 pub fn functions<'a>(&'a self, source: &'a str) -> impl Iterator<Item = FunctionRef<'a>> + 'a {
121 self.functions
122 .iter()
123 .map(move |inner| FunctionRef { inner, source })
124 }
125
126 pub fn imports<'a>(&'a self, source: &'a str) -> impl Iterator<Item = ImportRef<'a>> + 'a {
127 self.imports
128 .iter()
129 .map(move |inner| ImportRef { inner, source })
130 }
131
132 pub fn line_offsets(&self) -> &[usize] {
134 &self.line_offsets
135 }
136}
137
138impl FunctionRef<'_> {
139 pub fn name(&self) -> &str {
140 &self.inner.name
141 }
142
143 pub fn signature(&self) -> &str {
144 &self.inner.signature
145 }
146
147 pub fn start_line(&self) -> usize {
148 self.inner.start_line
149 }
150
151 pub fn end_line(&self) -> usize {
152 self.inner.end_line
153 }
154
155 pub fn content(&self) -> &str {
156 &self.source[self.inner.content.clone()]
157 }
158
159 pub fn to_owned(self) -> FunctionDef {
160 FunctionDef {
161 name: self.name().to_string(),
162 signature: self.signature().to_string(),
163 start_line: self.start_line(),
164 end_line: self.end_line(),
165 content: self.content().to_string(),
166 }
167 }
168}
169
170impl ImportRef<'_> {
171 pub fn raw(&self) -> &str {
172 &self.source[self.inner.raw.clone()]
173 }
174
175 pub fn kind(&self) -> ImportKind {
176 self.inner.kind
177 }
178
179 pub fn to_owned(self) -> Import {
180 Import {
181 raw: self.raw().to_string(),
182 kind: self.kind(),
183 }
184 }
185}
186
187pub(super) fn is_function_kind(kind: &str, language: Language) -> bool {
188 match language {
189 Language::Rust => {
190 kind == "function_item" || kind == "method_declaration" || kind == "closure_expression"
191 }
192 Language::Python => kind == "function_definition",
193 Language::JavaScript | Language::TypeScript => {
194 kind == "function_declaration"
195 || kind == "method_definition"
196 || kind == "generator_function_declaration"
197 || kind == "variable_declarator"
198 }
199 Language::Go => kind == "function_declaration" || kind == "method_declaration",
200 Language::C | Language::Cpp => kind == "function_definition",
201 Language::Java => kind == "method_declaration" || kind == "constructor_declaration",
202 Language::Unknown => false,
203 }
204}
205
206fn is_function_node(node: &Node<'_>, language: Language) -> bool {
207 match language {
208 Language::JavaScript | Language::TypeScript => {
209 matches!(
210 node.kind(),
211 "function_declaration" | "method_definition" | "generator_function_declaration"
212 ) || (node.kind() == "variable_declarator"
213 && node
214 .child_by_field_name("value")
215 .is_some_and(|value| is_javascript_function_value(value.kind())))
216 }
217 _ => is_function_kind(node.kind(), language),
218 }
219}
220
221fn function_name<'a>(node: &Node<'_>, source: &'a str) -> Option<&'a str> {
222 if let Some(name) = node.child_by_field_name("name") {
223 return Some(&source[name.byte_range()]);
224 }
225 if let Some(declarator) = node.child_by_field_name("declarator") {
226 if let Some(name) = c_function_name(declarator, source) {
227 return Some(name);
228 }
229 if let Some(name) = first_identifier_in_subtree(declarator, source) {
230 return Some(name);
231 }
232 }
233
234 let mut cursor = node.walk();
235 for child in node.children(&mut cursor) {
236 if matches!(
237 child.kind(),
238 "identifier" | "field_identifier" | "type_identifier" | "property_identifier"
239 ) {
240 return Some(&source[child.byte_range()]);
241 }
242 }
243 None
244}
245
246fn c_function_name<'a>(function_declarator: Node<'_>, source: &'a str) -> Option<&'a str> {
247 let mut current = function_declarator.child_by_field_name("declarator")?;
248 for _ in 0..32 {
249 match current.kind() {
250 "identifier"
251 | "field_identifier"
252 | "type_identifier"
253 | "property_identifier"
254 | "operator_name"
255 | "destructor_name" => return Some(&source[current.byte_range()]),
256 "qualified_identifier" | "template_function" => {
257 current = current.child_by_field_name("name")?;
258 }
259 "pointer_declarator"
260 | "reference_declarator"
261 | "function_declarator"
262 | "parenthesized_declarator" => {
263 current = current.child_by_field_name("declarator")?;
264 }
265 _ => return None,
266 }
267 }
268 None
269}
270
271fn first_identifier_in_subtree<'a>(node: Node<'_>, source: &'a str) -> Option<&'a str> {
272 let mut stack = vec![node];
273 while let Some(current) = stack.pop() {
274 if matches!(
275 current.kind(),
276 "identifier" | "field_identifier" | "type_identifier" | "property_identifier"
277 ) {
278 return Some(&source[current.byte_range()]);
279 }
280 push_children_reverse(current, &mut stack);
281 }
282 None
283}
284
285fn function_signature(node: &Node<'_>, source: &str) -> String {
286 if node.kind() == "variable_declarator" {
287 return variable_function_signature(node, source);
288 }
289
290 let mut signature_parts = Vec::new();
291 let mut cursor = node.walk();
292 for child in node.children(&mut cursor) {
293 let kind = child.kind();
294 if matches!(
295 kind,
296 "identifier"
297 | "field_identifier"
298 | "type_identifier"
299 | "property_identifier"
300 | "parameters"
301 | "formal_parameters"
302 | "parameter_list"
303 | "function_declarator"
304 | "type_parameters"
305 | "type_arguments"
306 | "return_type"
307 | "type_annotation"
308 | "result"
309 ) {
310 signature_parts.push(&source[child.byte_range()]);
311 }
312 if matches!(
313 kind,
314 "block" | "compound_statement" | "statement_block" | "suite"
315 ) {
316 break;
317 }
318 }
319
320 signature_parts.join(" ")
321}
322
323fn variable_function_signature(node: &Node<'_>, source: &str) -> String {
324 let Some(name) = node.child_by_field_name("name") else {
325 return String::new();
326 };
327 let Some(value) = node.child_by_field_name("value") else {
328 return source[name.byte_range()].to_string();
329 };
330
331 let mut signature_parts = vec![&source[name.byte_range()]];
332 let mut cursor = value.walk();
333 for child in value.children(&mut cursor) {
334 if matches!(child.kind(), "formal_parameters" | "parameters") {
335 signature_parts.push(&source[child.byte_range()]);
336 }
337 if matches!(child.kind(), "statement_block" | "body") {
338 break;
339 }
340 }
341 signature_parts.join(" ")
342}
343
344fn line_offsets(source: &str) -> Vec<usize> {
345 let mut offsets =
346 Vec::with_capacity(source.as_bytes().iter().filter(|&&b| b == b'\n').count() + 1);
347 offsets.push(0);
348 for (index, byte) in source.bytes().enumerate() {
349 if byte == b'\n' && index + 1 < source.len() {
350 offsets.push(index + 1);
351 }
352 }
353 offsets
354}
355
356fn is_javascript_function_value(kind: &str) -> bool {
357 matches!(
358 kind,
359 "arrow_function" | "function_expression" | "generator_function"
360 )
361}
362
363fn push_children_reverse<'tree>(node: Node<'tree>, stack: &mut Vec<Node<'tree>>) {
364 let child_count = node.child_count();
365 for index in (0..child_count).rev() {
366 if let Some(child) = node.child(index as u32) {
367 stack.push(child);
368 }
369 }
370}