infiniloom_engine/parser/
extraction.rs1use super::language::Language;
11use crate::types::{SymbolKind, Visibility};
12use std::collections::HashSet;
13use tree_sitter::Node;
14
15pub fn extract_signature(node: Node<'_>, source_code: &str, language: Language) -> Option<String> {
17 let sig_node = match language {
18 Language::Python => {
19 if node.kind() == "function_definition" {
20 let start = node.start_byte();
21 let mut end = start;
22 for byte in &source_code.as_bytes()[start..] {
23 end += 1;
24 if *byte == b':' || *byte == b'\n' {
25 break;
26 }
27 }
28 return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
29 }
30 None
31 },
32 Language::JavaScript | Language::TypeScript => {
33 if node.kind().contains("function") || node.kind().contains("method") {
34 let start = node.start_byte();
35 let mut end = start;
36 let mut brace_count = 0;
37 for byte in &source_code.as_bytes()[start..] {
38 if *byte == b'{' {
39 brace_count += 1;
40 if brace_count == 1 {
41 break;
42 }
43 }
44 end += 1;
45 }
46 return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
47 }
48 None
49 },
50 Language::Rust => {
51 if node.kind() == "function_item" {
52 for child in node.children(&mut node.walk()) {
53 if child.kind() == "block" {
54 let start = node.start_byte();
55 let end = child.start_byte();
56 return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
57 }
58 }
59 }
60 None
61 },
62 Language::Go => {
63 if node.kind() == "function_declaration" || node.kind() == "method_declaration" {
64 for child in node.children(&mut node.walk()) {
65 if child.kind() == "block" {
66 let start = node.start_byte();
67 let end = child.start_byte();
68 return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
69 }
70 }
71 }
72 None
73 },
74 Language::Java => {
75 if node.kind() == "method_declaration" {
76 for child in node.children(&mut node.walk()) {
77 if child.kind() == "block" {
78 let start = node.start_byte();
79 let end = child.start_byte();
80 return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
81 }
82 }
83 }
84 None
85 },
86 Language::C
87 | Language::Cpp
88 | Language::CSharp
89 | Language::Php
90 | Language::Kotlin
91 | Language::Swift
92 | Language::Scala => {
93 for child in node.children(&mut node.walk()) {
94 if child.kind() == "block"
95 || child.kind() == "compound_statement"
96 || child.kind() == "function_body"
97 {
98 let start = node.start_byte();
99 let end = child.start_byte();
100 return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
101 }
102 }
103 None
104 },
105 Language::Ruby | Language::Lua => {
106 let start = node.start_byte();
107 let mut end = start;
108 for byte in &source_code.as_bytes()[start..] {
109 end += 1;
110 if *byte == b'\n' {
111 break;
112 }
113 }
114 Some(source_code[start..end].trim().to_owned())
115 },
116 Language::Bash => {
117 let start = node.start_byte();
118 let mut end = start;
119 for byte in &source_code.as_bytes()[start..] {
120 if *byte == b'{' {
121 break;
122 }
123 end += 1;
124 }
125 Some(source_code[start..end].trim().to_owned())
126 },
127 Language::Haskell
128 | Language::OCaml
129 | Language::FSharp
130 | Language::Elixir
131 | Language::Clojure
132 | Language::R => {
133 let start = node.start_byte();
134 let mut end = start;
135 for byte in &source_code.as_bytes()[start..] {
136 end += 1;
137 if *byte == b'\n' || *byte == b'=' {
138 break;
139 }
140 }
141 Some(source_code[start..end].trim().to_owned())
142 },
143 };
144
145 sig_node.or_else(|| {
146 let start = node.start_byte();
147 let end = std::cmp::min(start + 200, source_code.len());
148 let text = &source_code[start..end];
149 text.lines().next().map(|s| s.trim().to_owned())
150 })
151}
152
153pub fn extract_docstring(node: Node<'_>, source_code: &str, language: Language) -> Option<String> {
155 match language {
156 Language::Python => {
157 let mut cursor = node.walk();
158 for child in node.children(&mut cursor) {
159 if child.kind() == "block" {
160 for stmt in child.children(&mut child.walk()) {
161 if stmt.kind() == "expression_statement" {
162 for expr in stmt.children(&mut stmt.walk()) {
163 if expr.kind() == "string" {
164 if let Ok(text) = expr.utf8_text(source_code.as_bytes()) {
165 return Some(
166 text.trim_matches(|c| c == '"' || c == '\'')
167 .trim()
168 .to_owned(),
169 );
170 }
171 }
172 }
173 }
174 }
175 }
176 }
177 None
178 },
179 Language::JavaScript | Language::TypeScript => {
180 if let Some(prev_sibling) = node.prev_sibling() {
181 if prev_sibling.kind() == "comment" {
182 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
183 if text.starts_with("/**") {
184 return Some(clean_jsdoc(text));
185 }
186 }
187 }
188 }
189 None
190 },
191 Language::Rust => {
192 let start_byte = node.start_byte();
193 let lines_before: Vec<_> = source_code[..start_byte]
194 .lines()
195 .rev()
196 .take_while(|line| line.trim().starts_with("///") || line.trim().is_empty())
197 .collect();
198
199 if !lines_before.is_empty() {
200 let doc: Vec<String> = lines_before
201 .into_iter()
202 .rev()
203 .filter_map(|line| {
204 let trimmed = line.trim();
205 trimmed.strip_prefix("///").map(|s| s.trim().to_owned())
206 })
207 .collect();
208
209 if !doc.is_empty() {
210 return Some(doc.join(" "));
211 }
212 }
213 None
214 },
215 Language::Go => {
216 if let Some(prev_sibling) = node.prev_sibling() {
217 if prev_sibling.kind() == "comment" {
218 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
219 return Some(text.trim_start_matches("//").trim().to_owned());
220 }
221 }
222 }
223 None
224 },
225 Language::Java => {
226 if let Some(prev_sibling) = node.prev_sibling() {
227 if prev_sibling.kind() == "block_comment" {
228 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
229 if text.starts_with("/**") {
230 return Some(clean_javadoc(text));
231 }
232 }
233 }
234 }
235 None
236 },
237 Language::C | Language::Cpp => {
238 if let Some(prev_sibling) = node.prev_sibling() {
239 if prev_sibling.kind() == "comment" {
240 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
241 if text.starts_with("/**") || text.starts_with("/*") {
242 return Some(clean_jsdoc(text));
243 }
244 return Some(text.trim_start_matches("//").trim().to_owned());
245 }
246 }
247 }
248 None
249 },
250 Language::CSharp => {
251 let start_byte = node.start_byte();
252 let lines_before: Vec<_> = source_code[..start_byte]
253 .lines()
254 .rev()
255 .take_while(|line| line.trim().starts_with("///") || line.trim().is_empty())
256 .collect();
257
258 if !lines_before.is_empty() {
259 let doc: Vec<String> = lines_before
260 .into_iter()
261 .rev()
262 .filter_map(|line| {
263 let trimmed = line.trim();
264 trimmed.strip_prefix("///").map(|s| s.trim().to_owned())
265 })
266 .collect();
267
268 if !doc.is_empty() {
269 return Some(doc.join(" "));
270 }
271 }
272 None
273 },
274 Language::Ruby => {
275 if let Some(prev_sibling) = node.prev_sibling() {
276 if prev_sibling.kind() == "comment" {
277 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
278 return Some(text.trim_start_matches('#').trim().to_owned());
279 }
280 }
281 }
282 None
283 },
284 Language::Php | Language::Kotlin | Language::Swift | Language::Scala => {
285 if let Some(prev_sibling) = node.prev_sibling() {
286 let kind = prev_sibling.kind();
287 if kind == "comment" || kind == "multiline_comment" || kind == "block_comment" {
288 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
289 if text.starts_with("/**") {
290 return Some(clean_jsdoc(text));
291 }
292 }
293 }
294 }
295 None
296 },
297 Language::Bash => {
298 if let Some(prev_sibling) = node.prev_sibling() {
299 if prev_sibling.kind() == "comment" {
300 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
301 return Some(text.trim_start_matches('#').trim().to_owned());
302 }
303 }
304 }
305 None
306 },
307 Language::Haskell => {
308 if let Some(prev_sibling) = node.prev_sibling() {
309 if prev_sibling.kind() == "comment" {
310 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
311 let cleaned = text
312 .trim_start_matches("{-")
313 .trim_end_matches("-}")
314 .trim_start_matches("--")
315 .trim();
316 return Some(cleaned.to_owned());
317 }
318 }
319 }
320 None
321 },
322 Language::Elixir => {
323 if let Some(prev_sibling) = node.prev_sibling() {
324 if prev_sibling.kind() == "comment" {
325 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
326 return Some(text.trim_start_matches('#').trim().to_owned());
327 }
328 }
329 }
330 None
331 },
332 Language::Clojure => None,
333 Language::OCaml | Language::FSharp => {
334 if let Some(prev_sibling) = node.prev_sibling() {
335 if prev_sibling.kind() == "comment" {
336 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
337 let cleaned = text
338 .trim_start_matches("(**")
339 .trim_start_matches("(*")
340 .trim_end_matches("*)")
341 .trim();
342 return Some(cleaned.to_owned());
343 }
344 }
345 }
346 None
347 },
348 Language::Lua => {
349 if let Some(prev_sibling) = node.prev_sibling() {
350 if prev_sibling.kind() == "comment" {
351 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
352 let cleaned = text
353 .trim_start_matches("--[[")
354 .trim_end_matches("]]")
355 .trim_start_matches("--")
356 .trim();
357 return Some(cleaned.to_owned());
358 }
359 }
360 }
361 None
362 },
363 Language::R => {
364 if let Some(prev_sibling) = node.prev_sibling() {
365 if prev_sibling.kind() == "comment" {
366 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
367 return Some(text.trim_start_matches('#').trim().to_owned());
368 }
369 }
370 }
371 None
372 },
373 }
374}
375
376pub fn extract_parent(node: Node<'_>, source_code: &str) -> Option<String> {
378 let mut current = node.parent()?;
379
380 while let Some(parent) = current.parent() {
381 if ["class_definition", "class_declaration", "struct_item", "impl_item"]
382 .contains(&parent.kind())
383 {
384 for child in parent.children(&mut parent.walk()) {
385 if child.kind() == "identifier" || child.kind() == "type_identifier" {
386 if let Ok(name) = child.utf8_text(source_code.as_bytes()) {
387 return Some(name.to_owned());
388 }
389 }
390 }
391 }
392 current = parent;
393 }
394
395 None
396}
397
398pub fn extract_visibility(node: Node<'_>, source_code: &str, language: Language) -> Visibility {
400 match language {
401 Language::Python => {
402 if let Some(name_node) = node.child_by_field_name("name") {
403 if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
404 if name.starts_with("__") && !name.ends_with("__") {
405 return Visibility::Private;
406 } else if name.starts_with('_') {
407 return Visibility::Protected;
408 }
409 }
410 }
411 Visibility::Public
412 },
413 Language::Rust => {
414 for child in node.children(&mut node.walk()) {
415 if child.kind() == "visibility_modifier" {
416 if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
417 if text.contains("pub(crate)") || text.contains("pub(super)") {
418 return Visibility::Internal;
419 } else if text.starts_with("pub") {
420 return Visibility::Public;
421 }
422 }
423 }
424 }
425 Visibility::Private
426 },
427 Language::JavaScript | Language::TypeScript => {
428 for child in node.children(&mut node.walk()) {
429 let kind = child.kind();
430 if kind == "private" || kind == "accessibility_modifier" {
431 if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
432 return match text {
433 "private" => Visibility::Private,
434 "protected" => Visibility::Protected,
435 _ => Visibility::Public,
436 };
437 }
438 }
439 }
440 if let Some(name_node) = node.child_by_field_name("name") {
441 if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
442 if name.starts_with('#') {
443 return Visibility::Private;
444 }
445 }
446 }
447 Visibility::Public
448 },
449 Language::Go => {
450 if let Some(name_node) = node.child_by_field_name("name") {
451 if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
452 if let Some(first_char) = name.chars().next() {
453 if first_char.is_lowercase() {
454 return Visibility::Private;
455 }
456 }
457 }
458 }
459 Visibility::Public
460 },
461 Language::Java => {
462 for child in node.children(&mut node.walk()) {
463 if child.kind() == "modifiers" {
464 if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
465 if text.contains("private") {
466 return Visibility::Private;
467 } else if text.contains("protected") {
468 return Visibility::Protected;
469 } else if text.contains("public") {
470 return Visibility::Public;
471 }
472 }
473 }
474 }
475 Visibility::Internal
476 },
477 Language::C | Language::Cpp => {
478 for child in node.children(&mut node.walk()) {
479 if child.kind() == "storage_class_specifier" {
480 if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
481 if text == "static" {
482 return Visibility::Private;
483 }
484 }
485 }
486 }
487 Visibility::Public
488 },
489 Language::CSharp | Language::Kotlin | Language::Swift | Language::Scala => {
490 for child in node.children(&mut node.walk()) {
491 let kind = child.kind();
492 if kind == "modifier" || kind == "modifiers" || kind == "visibility_modifier" {
493 if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
494 if text.contains("private") {
495 return Visibility::Private;
496 } else if text.contains("protected") {
497 return Visibility::Protected;
498 } else if text.contains("internal") {
499 return Visibility::Internal;
500 } else if text.contains("public") {
501 return Visibility::Public;
502 }
503 }
504 }
505 }
506 Visibility::Internal
507 },
508 Language::Ruby => {
509 if let Some(name_node) = node.child_by_field_name("name") {
510 if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
511 if name.starts_with('_') {
512 return Visibility::Private;
513 }
514 }
515 }
516 Visibility::Public
517 },
518 Language::Php => {
519 for child in node.children(&mut node.walk()) {
520 if child.kind() == "visibility_modifier" {
521 if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
522 return match text {
523 "private" => Visibility::Private,
524 "protected" => Visibility::Protected,
525 "public" => Visibility::Public,
526 _ => Visibility::Public,
527 };
528 }
529 }
530 }
531 Visibility::Public
532 },
533 Language::Bash => Visibility::Public,
534 Language::Haskell
535 | Language::Elixir
536 | Language::Clojure
537 | Language::OCaml
538 | Language::FSharp
539 | Language::Lua
540 | Language::R => Visibility::Public,
541 }
542}
543
544pub fn extract_calls(node: Node<'_>, source_code: &str, language: Language) -> Vec<String> {
546 let mut calls = HashSet::new();
547
548 let body_node = find_body_node(node, language);
549 if let Some(body) = body_node {
550 collect_calls_recursive(body, source_code, language, &mut calls);
551 }
552
553 if calls.is_empty() {
554 collect_calls_recursive(node, source_code, language, &mut calls);
555 }
556
557 calls.into_iter().collect()
558}
559
560pub fn find_body_node(node: Node<'_>, language: Language) -> Option<Node<'_>> {
562 match language {
563 Language::Python => {
564 for child in node.children(&mut node.walk()) {
565 if child.kind() == "block" {
566 return Some(child);
567 }
568 }
569 },
570 Language::Rust => {
571 for child in node.children(&mut node.walk()) {
572 if child.kind() == "block" {
573 return Some(child);
574 }
575 }
576 },
577 Language::JavaScript | Language::TypeScript => {
578 for child in node.children(&mut node.walk()) {
579 let kind = child.kind();
580 if kind == "statement_block" {
581 return Some(child);
582 }
583 if kind == "arrow_function" {
584 if let Some(body) = find_body_node(child, language) {
585 return Some(body);
586 }
587 return Some(child);
588 }
589 }
590 if node.kind() == "arrow_function" {
591 for child in node.children(&mut node.walk()) {
592 let kind = child.kind();
593 if kind != "formal_parameters"
594 && kind != "identifier"
595 && kind != "=>"
596 && kind != "("
597 && kind != ")"
598 && kind != ","
599 {
600 return Some(child);
601 }
602 }
603 return Some(node);
604 }
605 },
606 Language::Go => {
607 for child in node.children(&mut node.walk()) {
608 if child.kind() == "block" {
609 return Some(child);
610 }
611 }
612 },
613 Language::Java => {
614 for child in node.children(&mut node.walk()) {
615 if child.kind() == "block" {
616 return Some(child);
617 }
618 }
619 },
620 Language::C | Language::Cpp => {
621 for child in node.children(&mut node.walk()) {
622 if child.kind() == "compound_statement" {
623 return Some(child);
624 }
625 }
626 },
627 Language::CSharp
628 | Language::Php
629 | Language::Kotlin
630 | Language::Swift
631 | Language::Scala => {
632 for child in node.children(&mut node.walk()) {
633 let kind = child.kind();
634 if kind == "block" || kind == "compound_statement" || kind == "function_body" {
635 return Some(child);
636 }
637 }
638 },
639 Language::Ruby => {
640 for child in node.children(&mut node.walk()) {
641 if child.kind() == "body_statement" || child.kind() == "do_block" {
642 return Some(child);
643 }
644 }
645 },
646 Language::Bash => {
647 for child in node.children(&mut node.walk()) {
648 if child.kind() == "compound_statement" {
649 return Some(child);
650 }
651 }
652 },
653 Language::Haskell
654 | Language::Elixir
655 | Language::Clojure
656 | Language::OCaml
657 | Language::FSharp
658 | Language::R => {
659 return Some(node);
660 },
661 Language::Lua => {
662 for child in node.children(&mut node.walk()) {
663 if child.kind() == "block" {
664 return Some(child);
665 }
666 }
667 },
668 }
669 None
670}
671
672pub fn collect_calls_recursive(
674 node: Node<'_>,
675 source_code: &str,
676 language: Language,
677 calls: &mut HashSet<String>,
678) {
679 let kind = node.kind();
680
681 let call_name = match language {
682 Language::Python => {
683 if kind == "call" {
684 node.child_by_field_name("function").and_then(|f| {
685 if f.kind() == "identifier" {
686 f.utf8_text(source_code.as_bytes()).ok().map(String::from)
687 } else if f.kind() == "attribute" {
688 f.child_by_field_name("attribute")
689 .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
690 .map(String::from)
691 } else {
692 None
693 }
694 })
695 } else {
696 None
697 }
698 },
699 Language::Rust => {
700 if kind == "call_expression" {
701 node.child_by_field_name("function").and_then(|f| {
702 if f.kind() == "identifier" {
703 f.utf8_text(source_code.as_bytes()).ok().map(String::from)
704 } else if f.kind() == "field_expression" {
705 f.child_by_field_name("field")
706 .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
707 .map(String::from)
708 } else if f.kind() == "scoped_identifier" {
709 f.utf8_text(source_code.as_bytes()).ok().map(String::from)
710 } else {
711 None
712 }
713 })
714 } else if kind == "macro_invocation" {
715 node.child_by_field_name("macro")
716 .and_then(|m| m.utf8_text(source_code.as_bytes()).ok())
717 .map(|s| format!("{}!", s))
718 } else {
719 None
720 }
721 },
722 Language::JavaScript | Language::TypeScript => {
723 if kind == "call_expression" {
724 node.child_by_field_name("function").and_then(|f| {
725 if f.kind() == "identifier" {
726 f.utf8_text(source_code.as_bytes()).ok().map(String::from)
727 } else if f.kind() == "member_expression" {
728 f.child_by_field_name("property")
729 .and_then(|p| p.utf8_text(source_code.as_bytes()).ok())
730 .map(String::from)
731 } else {
732 None
733 }
734 })
735 } else {
736 None
737 }
738 },
739 Language::Go => {
740 if kind == "call_expression" {
741 node.child_by_field_name("function").and_then(|f| {
742 if f.kind() == "identifier" {
743 f.utf8_text(source_code.as_bytes()).ok().map(String::from)
744 } else if f.kind() == "selector_expression" {
745 f.child_by_field_name("field")
746 .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
747 .map(String::from)
748 } else {
749 None
750 }
751 })
752 } else {
753 None
754 }
755 },
756 Language::Java => {
757 if kind == "method_invocation" {
758 node.child_by_field_name("name")
759 .and_then(|n| n.utf8_text(source_code.as_bytes()).ok())
760 .map(String::from)
761 } else {
762 None
763 }
764 },
765 Language::C | Language::Cpp => {
766 if kind == "call_expression" {
767 node.child_by_field_name("function").and_then(|f| {
768 if f.kind() == "identifier" {
769 f.utf8_text(source_code.as_bytes()).ok().map(String::from)
770 } else if f.kind() == "field_expression" {
771 f.child_by_field_name("field")
772 .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
773 .map(String::from)
774 } else {
775 None
776 }
777 })
778 } else {
779 None
780 }
781 },
782 Language::CSharp
783 | Language::Php
784 | Language::Kotlin
785 | Language::Swift
786 | Language::Scala => {
787 if kind == "invocation_expression" || kind == "call_expression" {
788 node.children(&mut node.walk())
789 .find(|child| {
790 child.kind() == "identifier" || child.kind() == "simple_name"
791 })
792 .and_then(|child| child.utf8_text(source_code.as_bytes()).ok())
793 .map(|s| s.to_owned())
794 } else {
795 None
796 }
797 },
798 Language::Ruby => {
799 if kind == "call" || kind == "method_call" {
800 node.child_by_field_name("method")
801 .and_then(|m| m.utf8_text(source_code.as_bytes()).ok())
802 .map(String::from)
803 } else {
804 None
805 }
806 },
807 Language::Bash => {
808 if kind == "command" {
809 node.child_by_field_name("name")
810 .and_then(|n| n.utf8_text(source_code.as_bytes()).ok())
811 .map(String::from)
812 } else {
813 None
814 }
815 },
816 Language::Haskell
817 | Language::Elixir
818 | Language::Clojure
819 | Language::OCaml
820 | Language::FSharp
821 | Language::Lua
822 | Language::R => {
823 if kind == "function_call" || kind == "call" || kind == "application" {
824 node.children(&mut node.walk())
825 .find(|child| {
826 child.kind() == "identifier" || child.kind() == "variable"
827 })
828 .and_then(|child| child.utf8_text(source_code.as_bytes()).ok())
829 .map(|s| s.to_owned())
830 } else {
831 None
832 }
833 },
834 };
835
836 if let Some(name) = call_name {
837 if !is_builtin(&name, language) {
838 calls.insert(name);
839 }
840 }
841
842 for child in node.children(&mut node.walk()) {
843 collect_calls_recursive(child, source_code, language, calls);
844 }
845}
846
847pub fn is_builtin(name: &str, language: Language) -> bool {
849 match language {
850 Language::Python => {
851 matches!(
852 name,
853 "print"
854 | "len"
855 | "range"
856 | "str"
857 | "int"
858 | "float"
859 | "list"
860 | "dict"
861 | "set"
862 | "tuple"
863 | "bool"
864 | "type"
865 | "isinstance"
866 | "hasattr"
867 | "getattr"
868 | "setattr"
869 | "super"
870 | "iter"
871 | "next"
872 | "open"
873 | "input"
874 | "format"
875 | "enumerate"
876 | "zip"
877 | "map"
878 | "filter"
879 | "sorted"
880 | "reversed"
881 | "sum"
882 | "min"
883 | "max"
884 | "abs"
885 | "round"
886 | "ord"
887 | "chr"
888 | "hex"
889 | "bin"
890 | "oct"
891 )
892 },
893 Language::JavaScript | Language::TypeScript => {
894 matches!(
895 name,
896 "console"
897 | "log"
898 | "error"
899 | "warn"
900 | "parseInt"
901 | "parseFloat"
902 | "setTimeout"
903 | "setInterval"
904 | "clearTimeout"
905 | "clearInterval"
906 | "JSON"
907 | "stringify"
908 | "parse"
909 | "toString"
910 | "valueOf"
911 | "push"
912 | "pop"
913 | "shift"
914 | "unshift"
915 | "slice"
916 | "splice"
917 | "map"
918 | "filter"
919 | "reduce"
920 | "forEach"
921 | "find"
922 | "findIndex"
923 | "includes"
924 | "indexOf"
925 | "join"
926 | "split"
927 | "replace"
928 )
929 },
930 Language::Rust => {
931 matches!(
932 name,
933 "println!"
934 | "print!"
935 | "eprintln!"
936 | "eprint!"
937 | "format!"
938 | "vec!"
939 | "panic!"
940 | "assert!"
941 | "assert_eq!"
942 | "assert_ne!"
943 | "debug!"
944 | "info!"
945 | "warn!"
946 | "error!"
947 | "trace!"
948 | "unwrap"
949 | "expect"
950 | "ok"
951 | "err"
952 | "some"
953 | "none"
954 | "clone"
955 | "to_string"
956 | "into"
957 | "from"
958 | "default"
959 | "iter"
960 | "into_iter"
961 | "collect"
962 | "map"
963 | "filter"
964 )
965 },
966 Language::Go => {
967 matches!(
968 name,
969 "fmt"
970 | "Println"
971 | "Printf"
972 | "Sprintf"
973 | "Errorf"
974 | "make"
975 | "new"
976 | "len"
977 | "cap"
978 | "append"
979 | "copy"
980 | "delete"
981 | "close"
982 | "panic"
983 | "recover"
984 | "print"
985 )
986 },
987 Language::Java => {
988 matches!(
989 name,
990 "println"
991 | "print"
992 | "printf"
993 | "toString"
994 | "equals"
995 | "hashCode"
996 | "getClass"
997 | "clone"
998 | "notify"
999 | "wait"
1000 | "get"
1001 | "set"
1002 | "add"
1003 | "remove"
1004 | "size"
1005 | "isEmpty"
1006 | "contains"
1007 | "iterator"
1008 | "valueOf"
1009 | "parseInt"
1010 )
1011 },
1012 Language::C | Language::Cpp => {
1013 matches!(
1014 name,
1015 "printf"
1016 | "scanf"
1017 | "malloc"
1018 | "free"
1019 | "memcpy"
1020 | "memset"
1021 | "strlen"
1022 | "strcpy"
1023 | "strcmp"
1024 | "strcat"
1025 | "sizeof"
1026 | "cout"
1027 | "cin"
1028 | "endl"
1029 | "cerr"
1030 | "clog"
1031 )
1032 },
1033 Language::CSharp => {
1034 matches!(
1035 name,
1036 "WriteLine"
1037 | "Write"
1038 | "ReadLine"
1039 | "ToString"
1040 | "Equals"
1041 | "GetHashCode"
1042 | "GetType"
1043 | "Add"
1044 | "Remove"
1045 | "Contains"
1046 | "Count"
1047 | "Clear"
1048 | "ToList"
1049 | "ToArray"
1050 )
1051 },
1052 Language::Ruby => {
1053 matches!(
1054 name,
1055 "puts"
1056 | "print"
1057 | "p"
1058 | "gets"
1059 | "each"
1060 | "map"
1061 | "select"
1062 | "reject"
1063 | "reduce"
1064 | "inject"
1065 | "find"
1066 | "any?"
1067 | "all?"
1068 | "include?"
1069 | "empty?"
1070 | "nil?"
1071 | "length"
1072 | "size"
1073 )
1074 },
1075 Language::Php => {
1076 matches!(
1077 name,
1078 "echo"
1079 | "print"
1080 | "var_dump"
1081 | "print_r"
1082 | "isset"
1083 | "empty"
1084 | "array"
1085 | "count"
1086 | "strlen"
1087 | "strpos"
1088 | "substr"
1089 | "explode"
1090 | "implode"
1091 | "json_encode"
1092 | "json_decode"
1093 )
1094 },
1095 Language::Kotlin => {
1096 matches!(
1097 name,
1098 "println"
1099 | "print"
1100 | "readLine"
1101 | "toString"
1102 | "equals"
1103 | "hashCode"
1104 | "map"
1105 | "filter"
1106 | "forEach"
1107 | "let"
1108 | "also"
1109 | "apply"
1110 | "run"
1111 | "with"
1112 | "listOf"
1113 | "mapOf"
1114 | "setOf"
1115 )
1116 },
1117 Language::Swift => {
1118 matches!(
1119 name,
1120 "print"
1121 | "debugPrint"
1122 | "dump"
1123 | "map"
1124 | "filter"
1125 | "reduce"
1126 | "forEach"
1127 | "contains"
1128 | "count"
1129 | "isEmpty"
1130 | "append"
1131 )
1132 },
1133 Language::Scala => {
1134 matches!(
1135 name,
1136 "println"
1137 | "print"
1138 | "map"
1139 | "filter"
1140 | "flatMap"
1141 | "foreach"
1142 | "reduce"
1143 | "fold"
1144 | "foldLeft"
1145 | "foldRight"
1146 | "collect"
1147 )
1148 },
1149 Language::Bash
1150 | Language::Haskell
1151 | Language::Elixir
1152 | Language::Clojure
1153 | Language::OCaml
1154 | Language::FSharp
1155 | Language::Lua
1156 | Language::R => false,
1157 }
1158}
1159
1160pub fn clean_jsdoc(text: &str) -> String {
1162 text.lines()
1163 .map(|line| {
1164 line.trim()
1165 .trim_start_matches("/**")
1166 .trim_start_matches("/*")
1167 .trim_start_matches('*')
1168 .trim_end_matches("*/")
1169 .trim()
1170 })
1171 .filter(|line| !line.is_empty())
1172 .collect::<Vec<_>>()
1173 .join(" ")
1174}
1175
1176pub fn clean_javadoc(text: &str) -> String {
1178 clean_jsdoc(text)
1179}
1180
1181pub fn extract_inheritance(
1183 node: Node<'_>,
1184 source_code: &str,
1185 language: Language,
1186) -> (Option<String>, Vec<String>) {
1187 let mut extends = None;
1188 let mut implements = Vec::new();
1189
1190 match language {
1191 Language::Python => {
1192 if node.kind() == "class_definition" {
1194 if let Some(args) = node.child_by_field_name("superclasses") {
1195 for child in args.children(&mut args.walk()) {
1196 if child.kind() == "identifier" || child.kind() == "attribute" {
1197 if let Ok(name) = child.utf8_text(source_code.as_bytes()) {
1198 if extends.is_none() {
1199 extends = Some(name.to_owned());
1200 } else {
1201 implements.push(name.to_owned());
1202 }
1203 }
1204 }
1205 }
1206 }
1207 }
1208 },
1209 Language::JavaScript | Language::TypeScript => {
1210 if node.kind() == "class_declaration" || node.kind() == "class" {
1212 for child in node.children(&mut node.walk()) {
1213 if child.kind() == "class_heritage" {
1214 for heritage in child.children(&mut child.walk()) {
1215 if heritage.kind() == "extends_clause" {
1216 for type_node in heritage.children(&mut heritage.walk()) {
1217 if type_node.kind() == "identifier"
1218 || type_node.kind() == "type_identifier"
1219 {
1220 if let Ok(name) = type_node.utf8_text(source_code.as_bytes())
1221 {
1222 extends = Some(name.to_owned());
1223 }
1224 }
1225 }
1226 } else if heritage.kind() == "implements_clause" {
1227 for type_node in heritage.children(&mut heritage.walk()) {
1228 if type_node.kind() == "identifier"
1229 || type_node.kind() == "type_identifier"
1230 {
1231 if let Ok(name) = type_node.utf8_text(source_code.as_bytes())
1232 {
1233 implements.push(name.to_owned());
1234 }
1235 }
1236 }
1237 }
1238 }
1239 }
1240 }
1241 }
1242 },
1243 Language::Rust => {
1244 if node.kind() == "impl_item" {
1247 let mut has_for = false;
1248 for child in node.children(&mut node.walk()) {
1249 if child.kind() == "for" {
1250 has_for = true;
1251 }
1252 if child.kind() == "type_identifier" || child.kind() == "generic_type" {
1253 if let Ok(name) = child.utf8_text(source_code.as_bytes()) {
1254 if has_for {
1255 } else {
1257 implements.push(name.to_owned());
1259 }
1260 }
1261 }
1262 }
1263 }
1264 },
1265 Language::Go => {
1266 if node.kind() == "type_declaration" {
1268 for child in node.children(&mut node.walk()) {
1269 if child.kind() == "type_spec" {
1270 for spec_child in child.children(&mut child.walk()) {
1271 if spec_child.kind() == "struct_type" {
1272 for field in spec_child.children(&mut spec_child.walk()) {
1273 if field.kind() == "field_declaration" {
1274 let has_name = field.child_by_field_name("name").is_some();
1276 if !has_name {
1277 if let Some(type_node) = field.child_by_field_name("type")
1278 {
1279 if let Ok(name) =
1280 type_node.utf8_text(source_code.as_bytes())
1281 {
1282 implements.push(name.to_owned());
1283 }
1284 }
1285 }
1286 }
1287 }
1288 }
1289 }
1290 }
1291 }
1292 }
1293 },
1294 Language::Java => {
1295 if node.kind() == "class_declaration" {
1297 for child in node.children(&mut node.walk()) {
1298 if child.kind() == "superclass" {
1299 for type_node in child.children(&mut child.walk()) {
1300 if type_node.kind() == "type_identifier" {
1301 if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1302 extends = Some(name.to_owned());
1303 }
1304 }
1305 }
1306 } else if child.kind() == "super_interfaces" {
1307 for type_list in child.children(&mut child.walk()) {
1308 if type_list.kind() == "type_list" {
1309 for type_node in type_list.children(&mut type_list.walk()) {
1310 if type_node.kind() == "type_identifier" {
1311 if let Ok(name) = type_node.utf8_text(source_code.as_bytes())
1312 {
1313 implements.push(name.to_owned());
1314 }
1315 }
1316 }
1317 }
1318 }
1319 }
1320 }
1321 }
1322 },
1323 Language::C | Language::Cpp => {
1324 if node.kind() == "class_specifier" || node.kind() == "struct_specifier" {
1326 for child in node.children(&mut node.walk()) {
1327 if child.kind() == "base_class_clause" {
1328 for base in child.children(&mut child.walk()) {
1329 if base.kind() == "type_identifier" {
1330 if let Ok(name) = base.utf8_text(source_code.as_bytes()) {
1331 if extends.is_none() {
1332 extends = Some(name.to_owned());
1333 } else {
1334 implements.push(name.to_owned());
1335 }
1336 }
1337 }
1338 }
1339 }
1340 }
1341 }
1342 },
1343 Language::CSharp => {
1344 if node.kind() == "class_declaration" {
1346 for child in node.children(&mut node.walk()) {
1347 if child.kind() == "base_list" {
1348 for base in child.children(&mut child.walk()) {
1349 if base.kind() == "identifier" || base.kind() == "generic_name" {
1350 if let Ok(name) = base.utf8_text(source_code.as_bytes()) {
1351 if name.starts_with('I') && name.len() > 1 {
1352 implements.push(name.to_owned());
1354 } else if extends.is_none() {
1355 extends = Some(name.to_owned());
1356 } else {
1357 implements.push(name.to_owned());
1358 }
1359 }
1360 }
1361 }
1362 }
1363 }
1364 }
1365 },
1366 Language::Ruby => {
1367 if node.kind() == "class" {
1369 for child in node.children(&mut node.walk()) {
1370 if child.kind() == "superclass" {
1371 for type_node in child.children(&mut child.walk()) {
1372 if type_node.kind() == "constant" {
1373 if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1374 extends = Some(name.to_owned());
1375 }
1376 }
1377 }
1378 }
1379 }
1380 }
1381 },
1382 Language::Php => {
1383 if node.kind() == "class_declaration" {
1385 for child in node.children(&mut node.walk()) {
1386 if child.kind() == "base_clause" {
1387 for type_node in child.children(&mut child.walk()) {
1388 if type_node.kind() == "name" {
1389 if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1390 extends = Some(name.to_owned());
1391 }
1392 }
1393 }
1394 } else if child.kind() == "class_interface_clause" {
1395 for type_node in child.children(&mut child.walk()) {
1396 if type_node.kind() == "name" {
1397 if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1398 implements.push(name.to_owned());
1399 }
1400 }
1401 }
1402 }
1403 }
1404 }
1405 },
1406 Language::Kotlin => {
1407 if node.kind() == "class_declaration" {
1409 for child in node.children(&mut node.walk()) {
1410 if child.kind() == "delegation_specifiers" {
1411 for spec in child.children(&mut child.walk()) {
1412 if spec.kind() == "delegation_specifier" {
1413 for type_node in spec.children(&mut spec.walk()) {
1414 if type_node.kind() == "user_type" {
1415 if let Ok(name) = type_node.utf8_text(source_code.as_bytes())
1416 {
1417 if extends.is_none() {
1418 extends = Some(name.to_owned());
1419 } else {
1420 implements.push(name.to_owned());
1421 }
1422 }
1423 }
1424 }
1425 }
1426 }
1427 }
1428 }
1429 }
1430 },
1431 Language::Swift => {
1432 if node.kind() == "class_declaration" {
1434 for child in node.children(&mut node.walk()) {
1435 if child.kind() == "type_inheritance_clause" {
1436 for type_node in child.children(&mut child.walk()) {
1437 if type_node.kind() == "type_identifier" {
1438 if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1439 if extends.is_none() {
1440 extends = Some(name.to_owned());
1441 } else {
1442 implements.push(name.to_owned());
1443 }
1444 }
1445 }
1446 }
1447 }
1448 }
1449 }
1450 },
1451 Language::Scala => {
1452 if node.kind() == "class_definition" {
1454 for child in node.children(&mut node.walk()) {
1455 if child.kind() == "extends_clause" {
1456 for type_node in child.children(&mut child.walk()) {
1457 if type_node.kind() == "type_identifier" {
1458 if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1459 if extends.is_none() {
1460 extends = Some(name.to_owned());
1461 } else {
1462 implements.push(name.to_owned());
1463 }
1464 }
1465 }
1466 }
1467 }
1468 }
1469 }
1470 },
1471 Language::Bash
1472 | Language::Haskell
1473 | Language::Elixir
1474 | Language::Clojure
1475 | Language::OCaml
1476 | Language::FSharp
1477 | Language::Lua
1478 | Language::R => {},
1479 }
1480
1481 (extends, implements)
1482}
1483
1484pub fn map_symbol_kind(capture_name: &str) -> SymbolKind {
1486 match capture_name {
1487 "function" => SymbolKind::Function,
1488 "class" => SymbolKind::Class,
1489 "method" => SymbolKind::Method,
1490 "struct" => SymbolKind::Struct,
1491 "enum" => SymbolKind::Enum,
1492 "interface" => SymbolKind::Interface,
1493 "trait" => SymbolKind::Trait,
1494 _ => SymbolKind::Function,
1495 }
1496}