infiniloom_engine/parser/
extraction.rs1use super::language::Language;
11use crate::types::{SymbolKind, Visibility};
12use std::collections::HashSet;
13use tree_sitter::Node;
14
15fn safe_char_boundary(s: &str, mut index: usize) -> usize {
18 if index >= s.len() {
19 return s.len();
20 }
21 while index > 0 && !s.is_char_boundary(index) {
23 index -= 1;
24 }
25 index
26}
27
28pub fn extract_signature(node: Node<'_>, source_code: &str, language: Language) -> Option<String> {
30 let sig_node = match language {
31 Language::Python => {
32 if node.kind() == "function_definition" {
33 let start = node.start_byte();
34 let mut end = start;
35 for byte in &source_code.as_bytes()[start..] {
36 end += 1;
37 if *byte == b':' || *byte == b'\n' {
38 break;
39 }
40 }
41 return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
42 }
43 None
44 },
45 Language::JavaScript | Language::TypeScript => {
46 if node.kind().contains("function") || node.kind().contains("method") {
47 let start = node.start_byte();
48 let mut end = start;
49 let mut brace_count = 0;
50 for byte in &source_code.as_bytes()[start..] {
51 if *byte == b'{' {
52 brace_count += 1;
53 if brace_count == 1 {
54 break;
55 }
56 }
57 end += 1;
58 }
59 return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
60 }
61 None
62 },
63 Language::Rust => {
64 if node.kind() == "function_item" {
65 for child in node.children(&mut node.walk()) {
66 if child.kind() == "block" {
67 let start = node.start_byte();
68 let end = child.start_byte();
69 return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
70 }
71 }
72 }
73 None
74 },
75 Language::Go => {
76 if node.kind() == "function_declaration" || node.kind() == "method_declaration" {
77 for child in node.children(&mut node.walk()) {
78 if child.kind() == "block" {
79 let start = node.start_byte();
80 let end = child.start_byte();
81 return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
82 }
83 }
84 }
85 None
86 },
87 Language::Java => {
88 if node.kind() == "method_declaration" {
89 for child in node.children(&mut node.walk()) {
90 if child.kind() == "block" {
91 let start = node.start_byte();
92 let end = child.start_byte();
93 return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
94 }
95 }
96 }
97 None
98 },
99 Language::C
100 | Language::Cpp
101 | Language::CSharp
102 | Language::Php
103 | Language::Kotlin
104 | Language::Swift
105 | Language::Scala => {
106 for child in node.children(&mut node.walk()) {
107 if child.kind() == "block"
108 || child.kind() == "compound_statement"
109 || child.kind() == "function_body"
110 {
111 let start = node.start_byte();
112 let end = child.start_byte();
113 return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
114 }
115 }
116 None
117 },
118 Language::Ruby | Language::Lua => {
119 let start = node.start_byte();
120 let mut end = start;
121 for byte in &source_code.as_bytes()[start..] {
122 end += 1;
123 if *byte == b'\n' {
124 break;
125 }
126 }
127 Some(source_code[start..end].trim().to_owned())
128 },
129 Language::Bash => {
130 let start = node.start_byte();
131 let mut end = start;
132 for byte in &source_code.as_bytes()[start..] {
133 if *byte == b'{' {
134 break;
135 }
136 end += 1;
137 }
138 Some(source_code[start..end].trim().to_owned())
139 },
140 Language::Haskell
141 | Language::OCaml
142 | Language::FSharp
143 | Language::Elixir
144 | Language::Clojure
145 | Language::R => {
146 let start = node.start_byte();
147 let mut end = start;
148 for byte in &source_code.as_bytes()[start..] {
149 end += 1;
150 if *byte == b'\n' || *byte == b'=' {
151 break;
152 }
153 }
154 Some(source_code[start..end].trim().to_owned())
155 },
156 };
157
158 sig_node.or_else(|| {
159 let start = node.start_byte();
160 let end = std::cmp::min(start + 200, source_code.len());
161 let safe_start = safe_char_boundary(source_code, start);
163 let safe_end = safe_char_boundary(source_code, end);
164 if safe_start >= safe_end {
165 return None;
166 }
167 let text = &source_code[safe_start..safe_end];
168 text.lines().next().map(|s| s.trim().to_owned())
169 })
170}
171
172pub fn extract_docstring(node: Node<'_>, source_code: &str, language: Language) -> Option<String> {
174 match language {
175 Language::Python => {
176 let mut cursor = node.walk();
177 for child in node.children(&mut cursor) {
178 if child.kind() == "block" {
179 for stmt in child.children(&mut child.walk()) {
180 if stmt.kind() == "expression_statement" {
181 for expr in stmt.children(&mut stmt.walk()) {
182 if expr.kind() == "string" {
183 if let Ok(text) = expr.utf8_text(source_code.as_bytes()) {
184 return Some(
185 text.trim_matches(|c| c == '"' || c == '\'')
186 .trim()
187 .to_owned(),
188 );
189 }
190 }
191 }
192 }
193 }
194 }
195 }
196 None
197 },
198 Language::JavaScript | Language::TypeScript => {
199 if let Some(prev_sibling) = node.prev_sibling() {
200 if prev_sibling.kind() == "comment" {
201 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
202 if text.starts_with("/**") {
203 return Some(clean_jsdoc(text));
204 }
205 }
206 }
207 }
208 None
209 },
210 Language::Rust => {
211 let start_byte = node.start_byte();
212 let lines_before: Vec<_> = source_code[..start_byte]
213 .lines()
214 .rev()
215 .take_while(|line| line.trim().starts_with("///") || line.trim().is_empty())
216 .collect();
217
218 if !lines_before.is_empty() {
219 let doc: Vec<String> = lines_before
220 .into_iter()
221 .rev()
222 .filter_map(|line| {
223 let trimmed = line.trim();
224 trimmed.strip_prefix("///").map(|s| s.trim().to_owned())
225 })
226 .collect();
227
228 if !doc.is_empty() {
229 return Some(doc.join(" "));
230 }
231 }
232 None
233 },
234 Language::Go => {
235 if let Some(prev_sibling) = node.prev_sibling() {
236 if prev_sibling.kind() == "comment" {
237 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
238 return Some(text.trim_start_matches("//").trim().to_owned());
239 }
240 }
241 }
242 None
243 },
244 Language::Java => {
245 if let Some(prev_sibling) = node.prev_sibling() {
246 if prev_sibling.kind() == "block_comment" {
247 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
248 if text.starts_with("/**") {
249 return Some(clean_javadoc(text));
250 }
251 }
252 }
253 }
254 None
255 },
256 Language::C | Language::Cpp => {
257 if let Some(prev_sibling) = node.prev_sibling() {
258 if prev_sibling.kind() == "comment" {
259 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
260 if text.starts_with("/**") || text.starts_with("/*") {
261 return Some(clean_jsdoc(text));
262 }
263 return Some(text.trim_start_matches("//").trim().to_owned());
264 }
265 }
266 }
267 None
268 },
269 Language::CSharp => {
270 let start_byte = node.start_byte();
271 let lines_before: Vec<_> = source_code[..start_byte]
272 .lines()
273 .rev()
274 .take_while(|line| line.trim().starts_with("///") || line.trim().is_empty())
275 .collect();
276
277 if !lines_before.is_empty() {
278 let doc: Vec<String> = lines_before
279 .into_iter()
280 .rev()
281 .filter_map(|line| {
282 let trimmed = line.trim();
283 trimmed.strip_prefix("///").map(|s| s.trim().to_owned())
284 })
285 .collect();
286
287 if !doc.is_empty() {
288 return Some(doc.join(" "));
289 }
290 }
291 None
292 },
293 Language::Ruby => {
294 if let Some(prev_sibling) = node.prev_sibling() {
295 if prev_sibling.kind() == "comment" {
296 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
297 return Some(text.trim_start_matches('#').trim().to_owned());
298 }
299 }
300 }
301 None
302 },
303 Language::Php | Language::Kotlin | Language::Swift | Language::Scala => {
304 if let Some(prev_sibling) = node.prev_sibling() {
305 let kind = prev_sibling.kind();
306 if kind == "comment" || kind == "multiline_comment" || kind == "block_comment" {
307 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
308 if text.starts_with("/**") {
309 return Some(clean_jsdoc(text));
310 }
311 }
312 }
313 }
314 None
315 },
316 Language::Bash => {
317 if let Some(prev_sibling) = node.prev_sibling() {
318 if prev_sibling.kind() == "comment" {
319 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
320 return Some(text.trim_start_matches('#').trim().to_owned());
321 }
322 }
323 }
324 None
325 },
326 Language::Haskell => {
327 if let Some(prev_sibling) = node.prev_sibling() {
328 if prev_sibling.kind() == "comment" {
329 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
330 let cleaned = text
331 .trim_start_matches("{-")
332 .trim_end_matches("-}")
333 .trim_start_matches("--")
334 .trim();
335 return Some(cleaned.to_owned());
336 }
337 }
338 }
339 None
340 },
341 Language::Elixir => {
342 if let Some(prev_sibling) = node.prev_sibling() {
343 if prev_sibling.kind() == "comment" {
344 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
345 return Some(text.trim_start_matches('#').trim().to_owned());
346 }
347 }
348 }
349 None
350 },
351 Language::Clojure => None,
352 Language::OCaml | Language::FSharp => {
353 if let Some(prev_sibling) = node.prev_sibling() {
354 if prev_sibling.kind() == "comment" {
355 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
356 let cleaned = text
357 .trim_start_matches("(**")
358 .trim_start_matches("(*")
359 .trim_end_matches("*)")
360 .trim();
361 return Some(cleaned.to_owned());
362 }
363 }
364 }
365 None
366 },
367 Language::Lua => {
368 if let Some(prev_sibling) = node.prev_sibling() {
369 if prev_sibling.kind() == "comment" {
370 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
371 let cleaned = text
372 .trim_start_matches("--[[")
373 .trim_end_matches("]]")
374 .trim_start_matches("--")
375 .trim();
376 return Some(cleaned.to_owned());
377 }
378 }
379 }
380 None
381 },
382 Language::R => {
383 if let Some(prev_sibling) = node.prev_sibling() {
384 if prev_sibling.kind() == "comment" {
385 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
386 return Some(text.trim_start_matches('#').trim().to_owned());
387 }
388 }
389 }
390 None
391 },
392 }
393}
394
395pub fn extract_parent(node: Node<'_>, source_code: &str) -> Option<String> {
397 let mut current = node.parent()?;
398
399 while let Some(parent) = current.parent() {
400 if ["class_definition", "class_declaration", "struct_item", "impl_item"]
401 .contains(&parent.kind())
402 {
403 for child in parent.children(&mut parent.walk()) {
404 if child.kind() == "identifier" || child.kind() == "type_identifier" {
405 if let Ok(name) = child.utf8_text(source_code.as_bytes()) {
406 return Some(name.to_owned());
407 }
408 }
409 }
410 }
411 current = parent;
412 }
413
414 None
415}
416
417pub fn extract_visibility(node: Node<'_>, source_code: &str, language: Language) -> Visibility {
419 match language {
420 Language::Python => {
421 if let Some(name_node) = node.child_by_field_name("name") {
422 if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
423 if name.starts_with("__") && !name.ends_with("__") {
424 return Visibility::Private;
425 } else if name.starts_with('_') {
426 return Visibility::Protected;
427 }
428 }
429 }
430 Visibility::Public
431 },
432 Language::Rust => {
433 for child in node.children(&mut node.walk()) {
434 if child.kind() == "visibility_modifier" {
435 if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
436 if text.contains("pub(crate)") || text.contains("pub(super)") {
437 return Visibility::Internal;
438 } else if text.starts_with("pub") {
439 return Visibility::Public;
440 }
441 }
442 }
443 }
444 Visibility::Private
445 },
446 Language::JavaScript | Language::TypeScript => {
447 for child in node.children(&mut node.walk()) {
448 let kind = child.kind();
449 if kind == "private" || kind == "accessibility_modifier" {
450 if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
451 return match text {
452 "private" => Visibility::Private,
453 "protected" => Visibility::Protected,
454 _ => Visibility::Public,
455 };
456 }
457 }
458 }
459 if let Some(name_node) = node.child_by_field_name("name") {
460 if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
461 if name.starts_with('#') {
462 return Visibility::Private;
463 }
464 }
465 }
466 Visibility::Public
467 },
468 Language::Go => {
469 if let Some(name_node) = node.child_by_field_name("name") {
470 if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
471 if let Some(first_char) = name.chars().next() {
472 if first_char.is_lowercase() {
473 return Visibility::Private;
474 }
475 }
476 }
477 }
478 Visibility::Public
479 },
480 Language::Java => {
481 for child in node.children(&mut node.walk()) {
482 if child.kind() == "modifiers" {
483 if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
484 if text.contains("private") {
485 return Visibility::Private;
486 } else if text.contains("protected") {
487 return Visibility::Protected;
488 } else if text.contains("public") {
489 return Visibility::Public;
490 }
491 }
492 }
493 }
494 Visibility::Internal
495 },
496 Language::C | Language::Cpp => {
497 for child in node.children(&mut node.walk()) {
498 if child.kind() == "storage_class_specifier" {
499 if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
500 if text == "static" {
501 return Visibility::Private;
502 }
503 }
504 }
505 }
506 Visibility::Public
507 },
508 Language::CSharp | Language::Kotlin | Language::Swift | Language::Scala => {
509 for child in node.children(&mut node.walk()) {
510 let kind = child.kind();
511 if kind == "modifier" || kind == "modifiers" || kind == "visibility_modifier" {
512 if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
513 if text.contains("private") {
514 return Visibility::Private;
515 } else if text.contains("protected") {
516 return Visibility::Protected;
517 } else if text.contains("internal") {
518 return Visibility::Internal;
519 } else if text.contains("public") {
520 return Visibility::Public;
521 }
522 }
523 }
524 }
525 Visibility::Internal
526 },
527 Language::Ruby => {
528 if let Some(name_node) = node.child_by_field_name("name") {
529 if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
530 if name.starts_with('_') {
531 return Visibility::Private;
532 }
533 }
534 }
535 Visibility::Public
536 },
537 Language::Php => {
538 for child in node.children(&mut node.walk()) {
539 if child.kind() == "visibility_modifier" {
540 if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
541 return match text {
542 "private" => Visibility::Private,
543 "protected" => Visibility::Protected,
544 "public" => Visibility::Public,
545 _ => Visibility::Public,
546 };
547 }
548 }
549 }
550 Visibility::Public
551 },
552 Language::Bash => Visibility::Public,
553 Language::Haskell
554 | Language::Elixir
555 | Language::Clojure
556 | Language::OCaml
557 | Language::FSharp
558 | Language::Lua
559 | Language::R => Visibility::Public,
560 }
561}
562
563pub fn extract_calls(node: Node<'_>, source_code: &str, language: Language) -> Vec<String> {
565 let mut calls = HashSet::new();
566
567 let body_node = find_body_node(node, language);
568 if let Some(body) = body_node {
569 collect_calls_recursive(body, source_code, language, &mut calls);
570 }
571
572 if calls.is_empty() {
573 collect_calls_recursive(node, source_code, language, &mut calls);
574 }
575
576 calls.into_iter().collect()
577}
578
579pub fn find_body_node(node: Node<'_>, language: Language) -> Option<Node<'_>> {
581 match language {
582 Language::Python => {
583 for child in node.children(&mut node.walk()) {
584 if child.kind() == "block" {
585 return Some(child);
586 }
587 }
588 },
589 Language::Rust => {
590 for child in node.children(&mut node.walk()) {
591 if child.kind() == "block" {
592 return Some(child);
593 }
594 }
595 },
596 Language::JavaScript | Language::TypeScript => {
597 for child in node.children(&mut node.walk()) {
598 let kind = child.kind();
599 if kind == "statement_block" {
600 return Some(child);
601 }
602 if kind == "arrow_function" {
603 if let Some(body) = find_body_node(child, language) {
604 return Some(body);
605 }
606 return Some(child);
607 }
608 }
609 if node.kind() == "arrow_function" {
610 for child in node.children(&mut node.walk()) {
611 let kind = child.kind();
612 if kind != "formal_parameters"
613 && kind != "identifier"
614 && kind != "=>"
615 && kind != "("
616 && kind != ")"
617 && kind != ","
618 {
619 return Some(child);
620 }
621 }
622 return Some(node);
623 }
624 },
625 Language::Go => {
626 for child in node.children(&mut node.walk()) {
627 if child.kind() == "block" {
628 return Some(child);
629 }
630 }
631 },
632 Language::Java => {
633 for child in node.children(&mut node.walk()) {
634 if child.kind() == "block" {
635 return Some(child);
636 }
637 }
638 },
639 Language::C | Language::Cpp => {
640 for child in node.children(&mut node.walk()) {
641 if child.kind() == "compound_statement" {
642 return Some(child);
643 }
644 }
645 },
646 Language::CSharp | Language::Php | Language::Kotlin | Language::Swift | Language::Scala => {
647 for child in node.children(&mut node.walk()) {
648 let kind = child.kind();
649 if kind == "block" || kind == "compound_statement" || kind == "function_body" {
650 return Some(child);
651 }
652 }
653 },
654 Language::Ruby => {
655 for child in node.children(&mut node.walk()) {
656 if child.kind() == "body_statement" || child.kind() == "do_block" {
657 return Some(child);
658 }
659 }
660 },
661 Language::Bash => {
662 for child in node.children(&mut node.walk()) {
663 if child.kind() == "compound_statement" {
664 return Some(child);
665 }
666 }
667 },
668 Language::Haskell
669 | Language::Elixir
670 | Language::Clojure
671 | Language::OCaml
672 | Language::FSharp
673 | Language::R => {
674 return Some(node);
675 },
676 Language::Lua => {
677 for child in node.children(&mut node.walk()) {
678 if child.kind() == "block" {
679 return Some(child);
680 }
681 }
682 },
683 }
684 None
685}
686
687pub fn collect_calls_recursive(
689 node: Node<'_>,
690 source_code: &str,
691 language: Language,
692 calls: &mut HashSet<String>,
693) {
694 let kind = node.kind();
695
696 let call_name = match language {
697 Language::Python => {
698 if kind == "call" {
699 node.child_by_field_name("function").and_then(|f| {
700 if f.kind() == "identifier" {
701 f.utf8_text(source_code.as_bytes()).ok().map(String::from)
702 } else if f.kind() == "attribute" {
703 f.child_by_field_name("attribute")
704 .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
705 .map(String::from)
706 } else {
707 None
708 }
709 })
710 } else {
711 None
712 }
713 },
714 Language::Rust => {
715 if kind == "call_expression" {
716 node.child_by_field_name("function").and_then(|f| {
717 if f.kind() == "identifier" {
718 f.utf8_text(source_code.as_bytes()).ok().map(String::from)
719 } else if f.kind() == "field_expression" {
720 f.child_by_field_name("field")
721 .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
722 .map(String::from)
723 } else if f.kind() == "scoped_identifier" {
724 f.utf8_text(source_code.as_bytes()).ok().map(String::from)
725 } else {
726 None
727 }
728 })
729 } else if kind == "macro_invocation" {
730 node.child_by_field_name("macro")
731 .and_then(|m| m.utf8_text(source_code.as_bytes()).ok())
732 .map(|s| format!("{}!", s))
733 } else {
734 None
735 }
736 },
737 Language::JavaScript | Language::TypeScript => {
738 if kind == "call_expression" {
739 node.child_by_field_name("function").and_then(|f| {
740 if f.kind() == "identifier" {
741 f.utf8_text(source_code.as_bytes()).ok().map(String::from)
742 } else if f.kind() == "member_expression" {
743 f.child_by_field_name("property")
744 .and_then(|p| p.utf8_text(source_code.as_bytes()).ok())
745 .map(String::from)
746 } else {
747 None
748 }
749 })
750 } else {
751 None
752 }
753 },
754 Language::Go => {
755 if kind == "call_expression" {
756 node.child_by_field_name("function").and_then(|f| {
757 if f.kind() == "identifier" {
758 f.utf8_text(source_code.as_bytes()).ok().map(String::from)
759 } else if f.kind() == "selector_expression" {
760 f.child_by_field_name("field")
761 .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
762 .map(String::from)
763 } else {
764 None
765 }
766 })
767 } else {
768 None
769 }
770 },
771 Language::Java => {
772 if kind == "method_invocation" {
773 node.child_by_field_name("name")
774 .and_then(|n| n.utf8_text(source_code.as_bytes()).ok())
775 .map(String::from)
776 } else {
777 None
778 }
779 },
780 Language::C | Language::Cpp => {
781 if kind == "call_expression" {
782 node.child_by_field_name("function").and_then(|f| {
783 if f.kind() == "identifier" {
784 f.utf8_text(source_code.as_bytes()).ok().map(String::from)
785 } else if f.kind() == "field_expression" {
786 f.child_by_field_name("field")
787 .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
788 .map(String::from)
789 } else {
790 None
791 }
792 })
793 } else {
794 None
795 }
796 },
797 Language::CSharp | Language::Php | Language::Kotlin | Language::Swift | Language::Scala => {
798 if kind == "invocation_expression" || kind == "call_expression" {
799 node.children(&mut node.walk())
800 .find(|child| child.kind() == "identifier" || child.kind() == "simple_name")
801 .and_then(|child| child.utf8_text(source_code.as_bytes()).ok())
802 .map(|s| s.to_owned())
803 } else {
804 None
805 }
806 },
807 Language::Ruby => {
808 if kind == "call" || kind == "method_call" {
809 node.child_by_field_name("method")
810 .and_then(|m| m.utf8_text(source_code.as_bytes()).ok())
811 .map(String::from)
812 } else {
813 None
814 }
815 },
816 Language::Bash => {
817 if kind == "command" {
818 node.child_by_field_name("name")
819 .and_then(|n| n.utf8_text(source_code.as_bytes()).ok())
820 .map(String::from)
821 } else {
822 None
823 }
824 },
825 Language::Haskell
826 | Language::Elixir
827 | Language::Clojure
828 | Language::OCaml
829 | Language::FSharp
830 | Language::Lua
831 | Language::R => {
832 if kind == "function_call" || kind == "call" || kind == "application" {
833 node.children(&mut node.walk())
834 .find(|child| child.kind() == "identifier" || child.kind() == "variable")
835 .and_then(|child| child.utf8_text(source_code.as_bytes()).ok())
836 .map(|s| s.to_owned())
837 } else {
838 None
839 }
840 },
841 };
842
843 if let Some(name) = call_name {
844 if !is_builtin(&name, language) {
845 calls.insert(name);
846 }
847 }
848
849 for child in node.children(&mut node.walk()) {
850 collect_calls_recursive(child, source_code, language, calls);
851 }
852}
853
854pub fn is_builtin(name: &str, language: Language) -> bool {
856 match language {
857 Language::Python => {
858 matches!(
859 name,
860 "print"
861 | "len"
862 | "range"
863 | "str"
864 | "int"
865 | "float"
866 | "list"
867 | "dict"
868 | "set"
869 | "tuple"
870 | "bool"
871 | "type"
872 | "isinstance"
873 | "hasattr"
874 | "getattr"
875 | "setattr"
876 | "super"
877 | "iter"
878 | "next"
879 | "open"
880 | "input"
881 | "format"
882 | "enumerate"
883 | "zip"
884 | "map"
885 | "filter"
886 | "sorted"
887 | "reversed"
888 | "sum"
889 | "min"
890 | "max"
891 | "abs"
892 | "round"
893 | "ord"
894 | "chr"
895 | "hex"
896 | "bin"
897 | "oct"
898 )
899 },
900 Language::JavaScript | Language::TypeScript => {
901 matches!(
902 name,
903 "console"
904 | "log"
905 | "error"
906 | "warn"
907 | "parseInt"
908 | "parseFloat"
909 | "setTimeout"
910 | "setInterval"
911 | "clearTimeout"
912 | "clearInterval"
913 | "JSON"
914 | "stringify"
915 | "parse"
916 | "toString"
917 | "valueOf"
918 | "push"
919 | "pop"
920 | "shift"
921 | "unshift"
922 | "slice"
923 | "splice"
924 | "map"
925 | "filter"
926 | "reduce"
927 | "forEach"
928 | "find"
929 | "findIndex"
930 | "includes"
931 | "indexOf"
932 | "join"
933 | "split"
934 | "replace"
935 )
936 },
937 Language::Rust => {
938 matches!(
939 name,
940 "println!"
941 | "print!"
942 | "eprintln!"
943 | "eprint!"
944 | "format!"
945 | "vec!"
946 | "panic!"
947 | "assert!"
948 | "assert_eq!"
949 | "assert_ne!"
950 | "debug!"
951 | "info!"
952 | "warn!"
953 | "error!"
954 | "trace!"
955 | "unwrap"
956 | "expect"
957 | "ok"
958 | "err"
959 | "some"
960 | "none"
961 | "clone"
962 | "to_string"
963 | "into"
964 | "from"
965 | "default"
966 | "iter"
967 | "into_iter"
968 | "collect"
969 | "map"
970 | "filter"
971 )
972 },
973 Language::Go => {
974 matches!(
975 name,
976 "fmt"
977 | "Println"
978 | "Printf"
979 | "Sprintf"
980 | "Errorf"
981 | "make"
982 | "new"
983 | "len"
984 | "cap"
985 | "append"
986 | "copy"
987 | "delete"
988 | "close"
989 | "panic"
990 | "recover"
991 | "print"
992 )
993 },
994 Language::Java => {
995 matches!(
996 name,
997 "println"
998 | "print"
999 | "printf"
1000 | "toString"
1001 | "equals"
1002 | "hashCode"
1003 | "getClass"
1004 | "clone"
1005 | "notify"
1006 | "wait"
1007 | "get"
1008 | "set"
1009 | "add"
1010 | "remove"
1011 | "size"
1012 | "isEmpty"
1013 | "contains"
1014 | "iterator"
1015 | "valueOf"
1016 | "parseInt"
1017 )
1018 },
1019 Language::C | Language::Cpp => {
1020 matches!(
1021 name,
1022 "printf"
1023 | "scanf"
1024 | "malloc"
1025 | "free"
1026 | "memcpy"
1027 | "memset"
1028 | "strlen"
1029 | "strcpy"
1030 | "strcmp"
1031 | "strcat"
1032 | "sizeof"
1033 | "cout"
1034 | "cin"
1035 | "endl"
1036 | "cerr"
1037 | "clog"
1038 )
1039 },
1040 Language::CSharp => {
1041 matches!(
1042 name,
1043 "WriteLine"
1044 | "Write"
1045 | "ReadLine"
1046 | "ToString"
1047 | "Equals"
1048 | "GetHashCode"
1049 | "GetType"
1050 | "Add"
1051 | "Remove"
1052 | "Contains"
1053 | "Count"
1054 | "Clear"
1055 | "ToList"
1056 | "ToArray"
1057 )
1058 },
1059 Language::Ruby => {
1060 matches!(
1061 name,
1062 "puts"
1063 | "print"
1064 | "p"
1065 | "gets"
1066 | "each"
1067 | "map"
1068 | "select"
1069 | "reject"
1070 | "reduce"
1071 | "inject"
1072 | "find"
1073 | "any?"
1074 | "all?"
1075 | "include?"
1076 | "empty?"
1077 | "nil?"
1078 | "length"
1079 | "size"
1080 )
1081 },
1082 Language::Php => {
1083 matches!(
1084 name,
1085 "echo"
1086 | "print"
1087 | "var_dump"
1088 | "print_r"
1089 | "isset"
1090 | "empty"
1091 | "array"
1092 | "count"
1093 | "strlen"
1094 | "strpos"
1095 | "substr"
1096 | "explode"
1097 | "implode"
1098 | "json_encode"
1099 | "json_decode"
1100 )
1101 },
1102 Language::Kotlin => {
1103 matches!(
1104 name,
1105 "println"
1106 | "print"
1107 | "readLine"
1108 | "toString"
1109 | "equals"
1110 | "hashCode"
1111 | "map"
1112 | "filter"
1113 | "forEach"
1114 | "let"
1115 | "also"
1116 | "apply"
1117 | "run"
1118 | "with"
1119 | "listOf"
1120 | "mapOf"
1121 | "setOf"
1122 )
1123 },
1124 Language::Swift => {
1125 matches!(
1126 name,
1127 "print"
1128 | "debugPrint"
1129 | "dump"
1130 | "map"
1131 | "filter"
1132 | "reduce"
1133 | "forEach"
1134 | "contains"
1135 | "count"
1136 | "isEmpty"
1137 | "append"
1138 )
1139 },
1140 Language::Scala => {
1141 matches!(
1142 name,
1143 "println"
1144 | "print"
1145 | "map"
1146 | "filter"
1147 | "flatMap"
1148 | "foreach"
1149 | "reduce"
1150 | "fold"
1151 | "foldLeft"
1152 | "foldRight"
1153 | "collect"
1154 )
1155 },
1156 Language::Bash
1157 | Language::Haskell
1158 | Language::Elixir
1159 | Language::Clojure
1160 | Language::OCaml
1161 | Language::FSharp
1162 | Language::Lua
1163 | Language::R => false,
1164 }
1165}
1166
1167pub fn clean_jsdoc(text: &str) -> String {
1169 text.lines()
1170 .map(|line| {
1171 line.trim()
1172 .trim_start_matches("/**")
1173 .trim_start_matches("/*")
1174 .trim_start_matches('*')
1175 .trim_end_matches("*/")
1176 .trim()
1177 })
1178 .filter(|line| !line.is_empty())
1179 .collect::<Vec<_>>()
1180 .join(" ")
1181}
1182
1183pub fn clean_javadoc(text: &str) -> String {
1185 clean_jsdoc(text)
1186}
1187
1188pub fn extract_inheritance(
1190 node: Node<'_>,
1191 source_code: &str,
1192 language: Language,
1193) -> (Option<String>, Vec<String>) {
1194 let mut extends = None;
1195 let mut implements = Vec::new();
1196
1197 match language {
1198 Language::Python => {
1199 if node.kind() == "class_definition" {
1201 if let Some(args) = node.child_by_field_name("superclasses") {
1202 for child in args.children(&mut args.walk()) {
1203 if child.kind() == "identifier" || child.kind() == "attribute" {
1204 if let Ok(name) = child.utf8_text(source_code.as_bytes()) {
1205 if extends.is_none() {
1206 extends = Some(name.to_owned());
1207 } else {
1208 implements.push(name.to_owned());
1209 }
1210 }
1211 }
1212 }
1213 }
1214 }
1215 },
1216 Language::JavaScript | Language::TypeScript => {
1217 if node.kind() == "class_declaration" || node.kind() == "class" {
1219 for child in node.children(&mut node.walk()) {
1220 if child.kind() == "class_heritage" {
1221 for heritage in child.children(&mut child.walk()) {
1222 if heritage.kind() == "extends_clause" {
1223 for type_node in heritage.children(&mut heritage.walk()) {
1224 if type_node.kind() == "identifier"
1225 || type_node.kind() == "type_identifier"
1226 {
1227 if let Ok(name) =
1228 type_node.utf8_text(source_code.as_bytes())
1229 {
1230 extends = Some(name.to_owned());
1231 }
1232 }
1233 }
1234 } else if heritage.kind() == "implements_clause" {
1235 for type_node in heritage.children(&mut heritage.walk()) {
1236 if type_node.kind() == "identifier"
1237 || type_node.kind() == "type_identifier"
1238 {
1239 if let Ok(name) =
1240 type_node.utf8_text(source_code.as_bytes())
1241 {
1242 implements.push(name.to_owned());
1243 }
1244 }
1245 }
1246 }
1247 }
1248 }
1249 }
1250 }
1251 },
1252 Language::Rust => {
1253 if node.kind() == "impl_item" {
1256 let mut has_for = false;
1257 for child in node.children(&mut node.walk()) {
1258 if child.kind() == "for" {
1259 has_for = true;
1260 }
1261 if child.kind() == "type_identifier" || child.kind() == "generic_type" {
1262 if let Ok(name) = child.utf8_text(source_code.as_bytes()) {
1263 if has_for {
1264 } else {
1266 implements.push(name.to_owned());
1268 }
1269 }
1270 }
1271 }
1272 }
1273 },
1274 Language::Go => {
1275 if node.kind() == "type_declaration" {
1277 for child in node.children(&mut node.walk()) {
1278 if child.kind() == "type_spec" {
1279 for spec_child in child.children(&mut child.walk()) {
1280 if spec_child.kind() == "struct_type" {
1281 for field in spec_child.children(&mut spec_child.walk()) {
1282 if field.kind() == "field_declaration" {
1283 let has_name = field.child_by_field_name("name").is_some();
1285 if !has_name {
1286 if let Some(type_node) =
1287 field.child_by_field_name("type")
1288 {
1289 if let Ok(name) =
1290 type_node.utf8_text(source_code.as_bytes())
1291 {
1292 implements.push(name.to_owned());
1293 }
1294 }
1295 }
1296 }
1297 }
1298 }
1299 }
1300 }
1301 }
1302 }
1303 },
1304 Language::Java => {
1305 if node.kind() == "class_declaration" {
1307 for child in node.children(&mut node.walk()) {
1308 if child.kind() == "superclass" {
1309 for type_node in child.children(&mut child.walk()) {
1310 if type_node.kind() == "type_identifier" {
1311 if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1312 extends = Some(name.to_owned());
1313 }
1314 }
1315 }
1316 } else if child.kind() == "super_interfaces" {
1317 for type_list in child.children(&mut child.walk()) {
1318 if type_list.kind() == "type_list" {
1319 for type_node in type_list.children(&mut type_list.walk()) {
1320 if type_node.kind() == "type_identifier" {
1321 if let Ok(name) =
1322 type_node.utf8_text(source_code.as_bytes())
1323 {
1324 implements.push(name.to_owned());
1325 }
1326 }
1327 }
1328 }
1329 }
1330 }
1331 }
1332 }
1333 },
1334 Language::C | Language::Cpp => {
1335 if node.kind() == "class_specifier" || node.kind() == "struct_specifier" {
1337 for child in node.children(&mut node.walk()) {
1338 if child.kind() == "base_class_clause" {
1339 for base in child.children(&mut child.walk()) {
1340 if base.kind() == "type_identifier" {
1341 if let Ok(name) = base.utf8_text(source_code.as_bytes()) {
1342 if extends.is_none() {
1343 extends = Some(name.to_owned());
1344 } else {
1345 implements.push(name.to_owned());
1346 }
1347 }
1348 }
1349 }
1350 }
1351 }
1352 }
1353 },
1354 Language::CSharp => {
1355 if node.kind() == "class_declaration" {
1357 for child in node.children(&mut node.walk()) {
1358 if child.kind() == "base_list" {
1359 for base in child.children(&mut child.walk()) {
1360 if base.kind() == "identifier" || base.kind() == "generic_name" {
1361 if let Ok(name) = base.utf8_text(source_code.as_bytes()) {
1362 if name.starts_with('I') && name.len() > 1 {
1363 implements.push(name.to_owned());
1365 } else if extends.is_none() {
1366 extends = Some(name.to_owned());
1367 } else {
1368 implements.push(name.to_owned());
1369 }
1370 }
1371 }
1372 }
1373 }
1374 }
1375 }
1376 },
1377 Language::Ruby => {
1378 if node.kind() == "class" {
1380 for child in node.children(&mut node.walk()) {
1381 if child.kind() == "superclass" {
1382 for type_node in child.children(&mut child.walk()) {
1383 if type_node.kind() == "constant" {
1384 if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1385 extends = Some(name.to_owned());
1386 }
1387 }
1388 }
1389 }
1390 }
1391 }
1392 },
1393 Language::Php => {
1394 if node.kind() == "class_declaration" {
1396 for child in node.children(&mut node.walk()) {
1397 if child.kind() == "base_clause" {
1398 for type_node in child.children(&mut child.walk()) {
1399 if type_node.kind() == "name" {
1400 if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1401 extends = Some(name.to_owned());
1402 }
1403 }
1404 }
1405 } else if child.kind() == "class_interface_clause" {
1406 for type_node in child.children(&mut child.walk()) {
1407 if type_node.kind() == "name" {
1408 if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1409 implements.push(name.to_owned());
1410 }
1411 }
1412 }
1413 }
1414 }
1415 }
1416 },
1417 Language::Kotlin => {
1418 if node.kind() == "class_declaration" {
1420 for child in node.children(&mut node.walk()) {
1421 if child.kind() == "delegation_specifiers" {
1422 for spec in child.children(&mut child.walk()) {
1423 if spec.kind() == "delegation_specifier" {
1424 for type_node in spec.children(&mut spec.walk()) {
1425 if type_node.kind() == "user_type" {
1426 if let Ok(name) =
1427 type_node.utf8_text(source_code.as_bytes())
1428 {
1429 if extends.is_none() {
1430 extends = Some(name.to_owned());
1431 } else {
1432 implements.push(name.to_owned());
1433 }
1434 }
1435 }
1436 }
1437 }
1438 }
1439 }
1440 }
1441 }
1442 },
1443 Language::Swift => {
1444 if node.kind() == "class_declaration" {
1446 for child in node.children(&mut node.walk()) {
1447 if child.kind() == "type_inheritance_clause" {
1448 for type_node in child.children(&mut child.walk()) {
1449 if type_node.kind() == "type_identifier" {
1450 if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1451 if extends.is_none() {
1452 extends = Some(name.to_owned());
1453 } else {
1454 implements.push(name.to_owned());
1455 }
1456 }
1457 }
1458 }
1459 }
1460 }
1461 }
1462 },
1463 Language::Scala => {
1464 if node.kind() == "class_definition" {
1466 for child in node.children(&mut node.walk()) {
1467 if child.kind() == "extends_clause" {
1468 for type_node in child.children(&mut child.walk()) {
1469 if type_node.kind() == "type_identifier" {
1470 if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1471 if extends.is_none() {
1472 extends = Some(name.to_owned());
1473 } else {
1474 implements.push(name.to_owned());
1475 }
1476 }
1477 }
1478 }
1479 }
1480 }
1481 }
1482 },
1483 Language::Bash
1484 | Language::Haskell
1485 | Language::Elixir
1486 | Language::Clojure
1487 | Language::OCaml
1488 | Language::FSharp
1489 | Language::Lua
1490 | Language::R => {},
1491 }
1492
1493 (extends, implements)
1494}
1495
1496pub fn map_symbol_kind(capture_name: &str) -> SymbolKind {
1498 match capture_name {
1499 "function" => SymbolKind::Function,
1500 "class" => SymbolKind::Class,
1501 "method" => SymbolKind::Method,
1502 "struct" => SymbolKind::Struct,
1503 "enum" => SymbolKind::Enum,
1504 "interface" => SymbolKind::Interface,
1505 "trait" => SymbolKind::Trait,
1506 _ => SymbolKind::Function,
1507 }
1508}