1use std::path::Path;
2
3use tree_sitter::Node;
4
5use crate::language::Language;
6
7#[derive(Debug, Clone)]
8pub struct ParsedSymbol {
9 pub name: String,
10 pub qualified_name: String,
11 pub kind: String,
12 pub start_byte: usize,
13 pub end_byte: usize,
14 pub start_line: usize,
15 pub end_line: usize,
16 pub signature: Option<String>,
17 pub docs: Option<String>,
18 pub facts: Vec<ParsedSymbolFact>,
19}
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub struct ParsedSymbolFact {
23 pub kind: String,
24 pub value: String,
25}
26
27const NAME_KINDS: &[&str] = &[
28 "identifier",
29 "type_identifier",
30 "property_identifier",
31 "field_identifier",
32 "simple_identifier",
33 "namespace_identifier",
34];
35
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub enum ParserKind {
38 Rust,
39 TypeScript,
40 Tsx,
41 Kotlin,
42 C,
43 Cpp,
44 Markdown,
45}
46
47pub fn parser_kind(path: &Path, language: Language) -> ParserKind {
48 match language {
49 Language::Rust => ParserKind::Rust,
50 Language::TypeScript => {
51 if path.extension().and_then(|ext| ext.to_str()) == Some("tsx") {
52 ParserKind::Tsx
53 } else {
54 ParserKind::TypeScript
55 }
56 },
57 Language::Kotlin => ParserKind::Kotlin,
58 Language::C => ParserKind::C,
59 Language::Cpp => ParserKind::Cpp,
60 Language::Markdown => ParserKind::Markdown,
61 }
62}
63
64pub fn parse_symbols(
65 path: &Path,
66 language: Language,
67 text: &str,
68) -> anyhow::Result<Vec<ParsedSymbol>> {
69 match parser_kind(path, language) {
70 ParserKind::Rust => {
71 parse_tree_sitter(path, language, text, tree_sitter_rust::LANGUAGE.into())
72 },
73 ParserKind::TypeScript => parse_tree_sitter(
74 path,
75 language,
76 text,
77 tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
78 ),
79 ParserKind::Tsx => {
80 parse_tree_sitter(path, language, text, tree_sitter_typescript::LANGUAGE_TSX.into())
81 },
82 ParserKind::Kotlin => {
83 parse_tree_sitter(path, language, text, tree_sitter_kotlin::LANGUAGE.into())
84 },
85 ParserKind::C => parse_tree_sitter(path, language, text, tree_sitter_c::LANGUAGE.into()),
86 ParserKind::Cpp => {
87 parse_tree_sitter(path, language, text, tree_sitter_cpp::LANGUAGE.into())
88 },
89 ParserKind::Markdown => Ok(Vec::new()),
90 }
91}
92
93pub fn parse_error(path: &Path, language: Language, text: &str) -> anyhow::Result<Option<String>> {
94 let grammar = match parser_kind(path, language) {
95 ParserKind::Rust => tree_sitter_rust::LANGUAGE.into(),
96 ParserKind::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
97 ParserKind::Tsx => tree_sitter_typescript::LANGUAGE_TSX.into(),
98 ParserKind::Kotlin => tree_sitter_kotlin::LANGUAGE.into(),
99 ParserKind::C => tree_sitter_c::LANGUAGE.into(),
100 ParserKind::Cpp => tree_sitter_cpp::LANGUAGE.into(),
101 ParserKind::Markdown => return Ok(None),
102 };
103 let mut parser = tree_sitter::Parser::new();
104 parser.set_language(&grammar)?;
105 let tree =
106 parser.parse(text, None).ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
107 Ok(tree.root_node().has_error().then(|| {
108 "tree-sitter parse produced error nodes; partial structural index was retained".to_string()
109 }))
110}
111
112fn parse_tree_sitter(
113 path: &Path,
114 language: Language,
115 text: &str,
116 grammar: tree_sitter::Language,
117) -> anyhow::Result<Vec<ParsedSymbol>> {
118 let mut parser = tree_sitter::Parser::new();
119 parser.set_language(&grammar)?;
120 let tree =
121 parser.parse(text, None).ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
122 let mut out = Vec::new();
123 collect_symbols(path, language, text, tree.root_node(), &mut out);
124 out.sort_by_key(|symbol| (symbol.start_byte, symbol.end_byte));
125 out.dedup_by_key(|symbol| (symbol.start_byte, symbol.end_byte, symbol.name.clone()));
126 Ok(out)
127}
128
129fn collect_symbols(
130 path: &Path,
131 language: Language,
132 text: &str,
133 node: Node<'_>,
134 out: &mut Vec<ParsedSymbol>,
135) {
136 if node.is_error() || node.is_missing() {
137 return;
138 }
139 if let Some((kind, name_node)) = symbol_node(language, node) {
140 let name = node_text(name_node, text).unwrap_or_default();
141 if !name.is_empty() {
142 out.push(make_symbol(path, language, text, node, kind, name));
143 }
144 }
145 let mut cursor = node.walk();
146 for child in node.named_children(&mut cursor) {
147 collect_symbols(path, language, text, child, out);
148 }
149}
150
151fn symbol_node(language: Language, node: Node<'_>) -> Option<(&'static str, Node<'_>)> {
152 let kind = node.kind();
153 match language {
154 Language::Rust => match kind {
155 "function_item" => Some(("function", child_name(node)?)),
156 "struct_item" => Some(("struct", child_name(node)?)),
157 "enum_item" => Some(("enum", child_name(node)?)),
158 "trait_item" => Some(("trait", child_name(node)?)),
159 "impl_item" => Some(("impl", impl_name(node).unwrap_or(node))),
160 "mod_item" => Some(("module", child_name(node)?)),
161 "const_item" => Some(("const", child_name(node)?)),
162 "static_item" => Some(("static", child_name(node)?)),
163 "type_item" => Some(("type", child_name(node)?)),
164 "macro_definition" => Some(("macro", child_name(node)?)),
165 _ => None,
166 },
167 Language::TypeScript => match kind {
168 "function_declaration" | "method_definition" | "generator_function_declaration" => {
169 Some(("function", child_name(node)?))
170 },
171 "class_declaration" => Some(("class", child_name(node)?)),
172 "interface_declaration" => Some(("interface", child_name(node)?)),
173 "type_alias_declaration" => Some(("type", child_name(node)?)),
174 "variable_declarator" | "public_field_definition" => Some(("const", child_name(node)?)),
175 _ => None,
176 },
177 Language::Kotlin => match kind {
178 "class_declaration" => Some(("class", child_name(node)?)),
179 "object_declaration" => Some(("object", child_name(node)?)),
180 "function_declaration" => Some(("function", child_name(node)?)),
181 "property_declaration" => Some(("property", kotlin_property_name(node)?)),
182 "companion_object" | "companion_object_declaration" => {
183 Some(("object", companion_name(node).unwrap_or(node)))
184 },
185 _ => None,
186 },
187 Language::C => match kind {
188 "function_definition" => {
189 Some(("function", function_name(node).or_else(|| child_name(node))?))
190 },
191 "declaration" if has_descendant_kind(node, "function_declarator") => {
192 Some(("function", function_name(node).or_else(|| child_name(node))?))
193 },
194 "struct_specifier" => Some(("struct", child_name(node)?)),
195 "union_specifier" => Some(("union", child_name(node)?)),
196 "enum_specifier" => Some(("enum", child_name(node)?)),
197 "type_definition" => Some(("type", child_name(node)?)),
198 "preproc_function_def" => Some(("macro", child_name(node)?)),
199 _ => None,
200 },
201 Language::Cpp => match kind {
202 "function_definition" => {
203 Some(("function", function_name(node).or_else(|| child_name(node))?))
204 },
205 "declaration" if has_descendant_kind(node, "function_declarator") => {
206 Some(("function", function_name(node).or_else(|| child_name(node))?))
207 },
208 "class_specifier" => Some(("class", child_name(node)?)),
209 "struct_specifier" => Some(("struct", child_name(node)?)),
210 "union_specifier" => Some(("union", child_name(node)?)),
211 "enum_specifier" => Some(("enum", child_name(node)?)),
212 "type_definition" | "alias_declaration" => Some(("type", child_name(node)?)),
213 "namespace_definition" => Some(("namespace", child_name(node)?)),
214 "preproc_function_def" => Some(("macro", child_name(node)?)),
215 _ => None,
216 },
217 Language::Markdown => None,
218 }
219}
220
221fn child_name(node: Node<'_>) -> Option<Node<'_>> {
222 if let Some(name) = node.child_by_field_name("name") {
223 return Some(name);
224 }
225
226 let mut cursor = node.walk();
227 if let Some(name) =
228 node.named_children(&mut cursor).find(|child| NAME_KINDS.contains(&child.kind()))
229 {
230 return Some(name);
231 }
232
233 let mut cursor = node.walk();
234 node.named_children(&mut cursor).find_map(|child| first_descendant_node(child, NAME_KINDS))
235}
236
237fn first_descendant_node<'tree>(node: Node<'tree>, kinds: &[&str]) -> Option<Node<'tree>> {
238 let mut cursor = node.walk();
239 for child in node.named_children(&mut cursor) {
240 if kinds.contains(&child.kind()) {
241 return Some(child);
242 }
243 if let Some(value) = first_descendant_node(child, kinds) {
244 return Some(value);
245 }
246 }
247 None
248}
249
250fn has_descendant_kind(node: Node<'_>, kind: &str) -> bool {
251 let mut cursor = node.walk();
252 node.named_children(&mut cursor)
253 .any(|child| child.kind() == kind || has_descendant_kind(child, kind))
254}
255
256fn companion_name(node: Node<'_>) -> Option<Node<'_>> {
257 for index in 0..node.child_count() {
258 let Some(index) = u32::try_from(index).ok() else {
259 continue;
260 };
261 if let Some(child) = node.child(index)
262 && child.kind() == "companion"
263 {
264 return Some(child);
265 }
266 }
267 let mut cursor = node.walk();
268 node.named_children(&mut cursor)
269 .find(|child| matches!(child.kind(), "simple_identifier" | "type_identifier"))
270}
271
272fn kotlin_property_name(node: Node<'_>) -> Option<Node<'_>> {
273 child_name(kotlin_variable_declaration(node).unwrap_or(node))
274}
275
276fn kotlin_variable_declaration(node: Node<'_>) -> Option<Node<'_>> {
277 let mut cursor = node.walk();
278 node.named_children(&mut cursor).find_map(|child| {
279 if child.kind() == "variable_declaration" {
280 Some(child)
281 } else if matches!(child.kind(), "modifiers" | "type_parameters" | "type_constraints") {
282 None
283 } else {
284 kotlin_variable_declaration(child)
285 }
286 })
287}
288
289fn function_name(node: Node<'_>) -> Option<Node<'_>> {
290 let declarator = first_descendant_node(node, &["function_declarator"]).unwrap_or(node);
291 let name_root = declarator.child_by_field_name("declarator").unwrap_or(declarator);
292 if NAME_KINDS.contains(&name_root.kind()) {
293 return Some(name_root);
294 }
295 last_descendant_node(name_root, NAME_KINDS)
296}
297
298fn last_descendant_node<'tree>(node: Node<'tree>, kinds: &[&str]) -> Option<Node<'tree>> {
299 let mut cursor = node.walk();
300 let mut last = None;
301 for child in node.named_children(&mut cursor) {
302 if kinds.contains(&child.kind()) {
303 last = Some(child);
304 }
305 if let Some(value) = last_descendant_node(child, kinds) {
306 last = Some(value);
307 }
308 }
309 last
310}
311
312fn impl_name(node: Node<'_>) -> Option<Node<'_>> {
313 let mut cursor = node.walk();
314 node.named_children(&mut cursor).find(|child| {
315 matches!(child.kind(), "type_identifier" | "generic_type" | "scoped_type_identifier")
316 })
317}
318
319fn make_symbol(
320 path: &Path,
321 language: Language,
322 text: &str,
323 node: Node<'_>,
324 kind: &str,
325 name: String,
326) -> ParsedSymbol {
327 let start_byte = node.start_byte();
328 let end_byte = node.end_byte();
329 let start_line = byte_to_line(text, start_byte);
330 let end_line = byte_to_line(text, end_byte);
331 ParsedSymbol {
332 qualified_name: format!("{}::{name}", path.to_string_lossy().replace('\\', "/")),
333 name,
334 kind: kind.to_string(),
335 start_byte,
336 end_byte,
337 start_line,
338 end_line,
339 signature: signature_for(text, start_byte, end_byte),
340 docs: docs_before(text, start_byte),
341 facts: symbol_facts(language, text, node),
342 }
343}
344
345fn symbol_facts(language: Language, text: &str, node: Node<'_>) -> Vec<ParsedSymbolFact> {
346 if language != Language::Rust {
347 return Vec::new();
348 }
349 let mut facts = Vec::new();
350 for attribute in rust_attribute_items(text, node) {
351 if rust_attribute_is_uniffi_export(&attribute) {
352 facts.push(ParsedSymbolFact {
353 kind: "rust_attr".to_string(),
354 value: "uniffi_export".to_string(),
355 });
356 }
357 }
358 facts.sort_by(|left, right| (&left.kind, &left.value).cmp(&(&right.kind, &right.value)));
359 facts.dedup();
360 facts
361}
362
363fn rust_attribute_items(text: &str, node: Node<'_>) -> Vec<String> {
364 let mut attributes = Vec::new();
365 let mut cursor = node.walk();
366 for child in node.named_children(&mut cursor) {
367 if child.kind() == "attribute_item" {
368 attributes.push(node_text(child, text).unwrap_or_default());
369 }
370 }
371
372 let mut preceding = Vec::new();
373 let mut sibling = node.prev_named_sibling();
374 while let Some(previous) = sibling {
375 if previous.kind() != "attribute_item" {
376 break;
377 }
378 preceding.push(node_text(previous, text).unwrap_or_default());
379 sibling = previous.prev_named_sibling();
380 }
381 preceding.reverse();
382 preceding.extend(attributes);
383 preceding
384}
385
386fn rust_attribute_is_uniffi_export(attribute: &str) -> bool {
387 attribute.contains("uniffi::export") || attribute.contains("::uniffi::export")
388}
389
390fn node_text(node: Node<'_>, text: &str) -> Option<String> {
391 node.utf8_text(text.as_bytes()).ok().map(ToOwned::to_owned)
392}
393
394fn byte_to_line(text: &str, byte: usize) -> usize {
395 text[..byte.min(text.len())].bytes().filter(|byte| *byte == b'\n').count() + 1
396}
397
398fn signature_for(text: &str, start_byte: usize, end_byte: usize) -> Option<String> {
399 text.get(start_byte..end_byte)?
400 .lines()
401 .find(|line| !line.trim().is_empty())
402 .map(|line| line.trim().to_string())
403}
404
405fn docs_before(text: &str, start_byte: usize) -> Option<String> {
406 let before = text.get(..start_byte)?;
407 let mut docs = Vec::new();
408 for line in before.lines().rev() {
409 let trimmed = line.trim();
410 if matches!(trimmed, "/**" | "*/") {
411 continue;
412 } else if let Some(doc_line) = clean_doc_comment_line(trimmed) {
413 docs.push(doc_line);
414 } else if trimmed.is_empty() {
415 continue;
416 } else {
417 break;
418 }
419 }
420 docs.reverse();
421 (!docs.is_empty()).then(|| docs.join("\n"))
422}
423
424fn clean_doc_comment_line(trimmed: &str) -> Option<String> {
425 let line = if trimmed.starts_with("///") {
426 trimmed.trim_start_matches('/')
427 } else if trimmed.starts_with('*') || trimmed.starts_with("/**") {
428 trimmed.trim_start_matches('/').trim_start_matches('*').trim_end_matches('/')
429 } else {
430 return None;
431 }
432 .trim();
433
434 (!line.is_empty()).then(|| line.to_string())
435}