infiniloom_engine/parser/
extraction.rs1use super::language::Language;
11use crate::types::{SymbolKind, Visibility};
12use std::collections::HashSet;
13use tree_sitter::Node;
14
15pub fn extract_signature(node: Node<'_>, source_code: &str, language: Language) -> Option<String> {
17 let sig_node = match language {
18 Language::Python => {
19 if node.kind() == "function_definition" {
20 let start = node.start_byte();
21 let mut end = start;
22 for byte in &source_code.as_bytes()[start..] {
23 end += 1;
24 if *byte == b':' || *byte == b'\n' {
25 break;
26 }
27 }
28 return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
29 }
30 None
31 },
32 Language::JavaScript | Language::TypeScript => {
33 if node.kind().contains("function") || node.kind().contains("method") {
34 let start = node.start_byte();
35 let mut end = start;
36 let mut brace_count = 0;
37 for byte in &source_code.as_bytes()[start..] {
38 if *byte == b'{' {
39 brace_count += 1;
40 if brace_count == 1 {
41 break;
42 }
43 }
44 end += 1;
45 }
46 return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
47 }
48 None
49 },
50 Language::Rust => {
51 if node.kind() == "function_item" {
52 for child in node.children(&mut node.walk()) {
53 if child.kind() == "block" {
54 let start = node.start_byte();
55 let end = child.start_byte();
56 return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
57 }
58 }
59 }
60 None
61 },
62 Language::Go => {
63 if node.kind() == "function_declaration" || node.kind() == "method_declaration" {
64 for child in node.children(&mut node.walk()) {
65 if child.kind() == "block" {
66 let start = node.start_byte();
67 let end = child.start_byte();
68 return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
69 }
70 }
71 }
72 None
73 },
74 Language::Java => {
75 if node.kind() == "method_declaration" {
76 for child in node.children(&mut node.walk()) {
77 if child.kind() == "block" {
78 let start = node.start_byte();
79 let end = child.start_byte();
80 return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
81 }
82 }
83 }
84 None
85 },
86 Language::C
87 | Language::Cpp
88 | Language::CSharp
89 | Language::Php
90 | Language::Kotlin
91 | Language::Swift
92 | Language::Scala => {
93 for child in node.children(&mut node.walk()) {
94 if child.kind() == "block"
95 || child.kind() == "compound_statement"
96 || child.kind() == "function_body"
97 {
98 let start = node.start_byte();
99 let end = child.start_byte();
100 return Some(source_code[start..end].trim().to_owned().replace('\n', " "));
101 }
102 }
103 None
104 },
105 Language::Ruby | Language::Lua => {
106 let start = node.start_byte();
107 let mut end = start;
108 for byte in &source_code.as_bytes()[start..] {
109 end += 1;
110 if *byte == b'\n' {
111 break;
112 }
113 }
114 Some(source_code[start..end].trim().to_owned())
115 },
116 Language::Bash => {
117 let start = node.start_byte();
118 let mut end = start;
119 for byte in &source_code.as_bytes()[start..] {
120 if *byte == b'{' {
121 break;
122 }
123 end += 1;
124 }
125 Some(source_code[start..end].trim().to_owned())
126 },
127 Language::Haskell
128 | Language::OCaml
129 | Language::FSharp
130 | Language::Elixir
131 | Language::Clojure
132 | Language::R => {
133 let start = node.start_byte();
134 let mut end = start;
135 for byte in &source_code.as_bytes()[start..] {
136 end += 1;
137 if *byte == b'\n' || *byte == b'=' {
138 break;
139 }
140 }
141 Some(source_code[start..end].trim().to_owned())
142 },
143 };
144
145 sig_node.or_else(|| {
146 let start = node.start_byte();
147 let end = std::cmp::min(start + 200, source_code.len());
148 let text = &source_code[start..end];
149 text.lines().next().map(|s| s.trim().to_owned())
150 })
151}
152
153pub fn extract_docstring(node: Node<'_>, source_code: &str, language: Language) -> Option<String> {
155 match language {
156 Language::Python => {
157 let mut cursor = node.walk();
158 for child in node.children(&mut cursor) {
159 if child.kind() == "block" {
160 for stmt in child.children(&mut child.walk()) {
161 if stmt.kind() == "expression_statement" {
162 for expr in stmt.children(&mut stmt.walk()) {
163 if expr.kind() == "string" {
164 if let Ok(text) = expr.utf8_text(source_code.as_bytes()) {
165 return Some(
166 text.trim_matches(|c| c == '"' || c == '\'')
167 .trim()
168 .to_owned(),
169 );
170 }
171 }
172 }
173 }
174 }
175 }
176 }
177 None
178 },
179 Language::JavaScript | Language::TypeScript => {
180 if let Some(prev_sibling) = node.prev_sibling() {
181 if prev_sibling.kind() == "comment" {
182 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
183 if text.starts_with("/**") {
184 return Some(clean_jsdoc(text));
185 }
186 }
187 }
188 }
189 None
190 },
191 Language::Rust => {
192 let start_byte = node.start_byte();
193 let lines_before: Vec<_> = source_code[..start_byte]
194 .lines()
195 .rev()
196 .take_while(|line| line.trim().starts_with("///") || line.trim().is_empty())
197 .collect();
198
199 if !lines_before.is_empty() {
200 let doc: Vec<String> = lines_before
201 .into_iter()
202 .rev()
203 .filter_map(|line| {
204 let trimmed = line.trim();
205 trimmed.strip_prefix("///").map(|s| s.trim().to_owned())
206 })
207 .collect();
208
209 if !doc.is_empty() {
210 return Some(doc.join(" "));
211 }
212 }
213 None
214 },
215 Language::Go => {
216 if let Some(prev_sibling) = node.prev_sibling() {
217 if prev_sibling.kind() == "comment" {
218 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
219 return Some(text.trim_start_matches("//").trim().to_owned());
220 }
221 }
222 }
223 None
224 },
225 Language::Java => {
226 if let Some(prev_sibling) = node.prev_sibling() {
227 if prev_sibling.kind() == "block_comment" {
228 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
229 if text.starts_with("/**") {
230 return Some(clean_javadoc(text));
231 }
232 }
233 }
234 }
235 None
236 },
237 Language::C | Language::Cpp => {
238 if let Some(prev_sibling) = node.prev_sibling() {
239 if prev_sibling.kind() == "comment" {
240 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
241 if text.starts_with("/**") || text.starts_with("/*") {
242 return Some(clean_jsdoc(text));
243 }
244 return Some(text.trim_start_matches("//").trim().to_owned());
245 }
246 }
247 }
248 None
249 },
250 Language::CSharp => {
251 let start_byte = node.start_byte();
252 let lines_before: Vec<_> = source_code[..start_byte]
253 .lines()
254 .rev()
255 .take_while(|line| line.trim().starts_with("///") || line.trim().is_empty())
256 .collect();
257
258 if !lines_before.is_empty() {
259 let doc: Vec<String> = lines_before
260 .into_iter()
261 .rev()
262 .filter_map(|line| {
263 let trimmed = line.trim();
264 trimmed.strip_prefix("///").map(|s| s.trim().to_owned())
265 })
266 .collect();
267
268 if !doc.is_empty() {
269 return Some(doc.join(" "));
270 }
271 }
272 None
273 },
274 Language::Ruby => {
275 if let Some(prev_sibling) = node.prev_sibling() {
276 if prev_sibling.kind() == "comment" {
277 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
278 return Some(text.trim_start_matches('#').trim().to_owned());
279 }
280 }
281 }
282 None
283 },
284 Language::Php | Language::Kotlin | Language::Swift | Language::Scala => {
285 if let Some(prev_sibling) = node.prev_sibling() {
286 let kind = prev_sibling.kind();
287 if kind == "comment" || kind == "multiline_comment" || kind == "block_comment" {
288 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
289 if text.starts_with("/**") {
290 return Some(clean_jsdoc(text));
291 }
292 }
293 }
294 }
295 None
296 },
297 Language::Bash => {
298 if let Some(prev_sibling) = node.prev_sibling() {
299 if prev_sibling.kind() == "comment" {
300 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
301 return Some(text.trim_start_matches('#').trim().to_owned());
302 }
303 }
304 }
305 None
306 },
307 Language::Haskell => {
308 if let Some(prev_sibling) = node.prev_sibling() {
309 if prev_sibling.kind() == "comment" {
310 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
311 let cleaned = text
312 .trim_start_matches("{-")
313 .trim_end_matches("-}")
314 .trim_start_matches("--")
315 .trim();
316 return Some(cleaned.to_owned());
317 }
318 }
319 }
320 None
321 },
322 Language::Elixir => {
323 if let Some(prev_sibling) = node.prev_sibling() {
324 if prev_sibling.kind() == "comment" {
325 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
326 return Some(text.trim_start_matches('#').trim().to_owned());
327 }
328 }
329 }
330 None
331 },
332 Language::Clojure => None,
333 Language::OCaml | Language::FSharp => {
334 if let Some(prev_sibling) = node.prev_sibling() {
335 if prev_sibling.kind() == "comment" {
336 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
337 let cleaned = text
338 .trim_start_matches("(**")
339 .trim_start_matches("(*")
340 .trim_end_matches("*)")
341 .trim();
342 return Some(cleaned.to_owned());
343 }
344 }
345 }
346 None
347 },
348 Language::Lua => {
349 if let Some(prev_sibling) = node.prev_sibling() {
350 if prev_sibling.kind() == "comment" {
351 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
352 let cleaned = text
353 .trim_start_matches("--[[")
354 .trim_end_matches("]]")
355 .trim_start_matches("--")
356 .trim();
357 return Some(cleaned.to_owned());
358 }
359 }
360 }
361 None
362 },
363 Language::R => {
364 if let Some(prev_sibling) = node.prev_sibling() {
365 if prev_sibling.kind() == "comment" {
366 if let Ok(text) = prev_sibling.utf8_text(source_code.as_bytes()) {
367 return Some(text.trim_start_matches('#').trim().to_owned());
368 }
369 }
370 }
371 None
372 },
373 }
374}
375
376pub fn extract_parent(node: Node<'_>, source_code: &str) -> Option<String> {
378 let mut current = node.parent()?;
379
380 while let Some(parent) = current.parent() {
381 if ["class_definition", "class_declaration", "struct_item", "impl_item"]
382 .contains(&parent.kind())
383 {
384 for child in parent.children(&mut parent.walk()) {
385 if child.kind() == "identifier" || child.kind() == "type_identifier" {
386 if let Ok(name) = child.utf8_text(source_code.as_bytes()) {
387 return Some(name.to_owned());
388 }
389 }
390 }
391 }
392 current = parent;
393 }
394
395 None
396}
397
398pub fn extract_visibility(node: Node<'_>, source_code: &str, language: Language) -> Visibility {
400 match language {
401 Language::Python => {
402 if let Some(name_node) = node.child_by_field_name("name") {
403 if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
404 if name.starts_with("__") && !name.ends_with("__") {
405 return Visibility::Private;
406 } else if name.starts_with('_') {
407 return Visibility::Protected;
408 }
409 }
410 }
411 Visibility::Public
412 },
413 Language::Rust => {
414 for child in node.children(&mut node.walk()) {
415 if child.kind() == "visibility_modifier" {
416 if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
417 if text.contains("pub(crate)") || text.contains("pub(super)") {
418 return Visibility::Internal;
419 } else if text.starts_with("pub") {
420 return Visibility::Public;
421 }
422 }
423 }
424 }
425 Visibility::Private
426 },
427 Language::JavaScript | Language::TypeScript => {
428 for child in node.children(&mut node.walk()) {
429 let kind = child.kind();
430 if kind == "private" || kind == "accessibility_modifier" {
431 if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
432 return match text {
433 "private" => Visibility::Private,
434 "protected" => Visibility::Protected,
435 _ => Visibility::Public,
436 };
437 }
438 }
439 }
440 if let Some(name_node) = node.child_by_field_name("name") {
441 if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
442 if name.starts_with('#') {
443 return Visibility::Private;
444 }
445 }
446 }
447 Visibility::Public
448 },
449 Language::Go => {
450 if let Some(name_node) = node.child_by_field_name("name") {
451 if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
452 if let Some(first_char) = name.chars().next() {
453 if first_char.is_lowercase() {
454 return Visibility::Private;
455 }
456 }
457 }
458 }
459 Visibility::Public
460 },
461 Language::Java => {
462 for child in node.children(&mut node.walk()) {
463 if child.kind() == "modifiers" {
464 if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
465 if text.contains("private") {
466 return Visibility::Private;
467 } else if text.contains("protected") {
468 return Visibility::Protected;
469 } else if text.contains("public") {
470 return Visibility::Public;
471 }
472 }
473 }
474 }
475 Visibility::Internal
476 },
477 Language::C | Language::Cpp => {
478 for child in node.children(&mut node.walk()) {
479 if child.kind() == "storage_class_specifier" {
480 if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
481 if text == "static" {
482 return Visibility::Private;
483 }
484 }
485 }
486 }
487 Visibility::Public
488 },
489 Language::CSharp | Language::Kotlin | Language::Swift | Language::Scala => {
490 for child in node.children(&mut node.walk()) {
491 let kind = child.kind();
492 if kind == "modifier" || kind == "modifiers" || kind == "visibility_modifier" {
493 if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
494 if text.contains("private") {
495 return Visibility::Private;
496 } else if text.contains("protected") {
497 return Visibility::Protected;
498 } else if text.contains("internal") {
499 return Visibility::Internal;
500 } else if text.contains("public") {
501 return Visibility::Public;
502 }
503 }
504 }
505 }
506 Visibility::Internal
507 },
508 Language::Ruby => {
509 if let Some(name_node) = node.child_by_field_name("name") {
510 if let Ok(name) = name_node.utf8_text(source_code.as_bytes()) {
511 if name.starts_with('_') {
512 return Visibility::Private;
513 }
514 }
515 }
516 Visibility::Public
517 },
518 Language::Php => {
519 for child in node.children(&mut node.walk()) {
520 if child.kind() == "visibility_modifier" {
521 if let Ok(text) = child.utf8_text(source_code.as_bytes()) {
522 return match text {
523 "private" => Visibility::Private,
524 "protected" => Visibility::Protected,
525 "public" => Visibility::Public,
526 _ => Visibility::Public,
527 };
528 }
529 }
530 }
531 Visibility::Public
532 },
533 Language::Bash => Visibility::Public,
534 Language::Haskell
535 | Language::Elixir
536 | Language::Clojure
537 | Language::OCaml
538 | Language::FSharp
539 | Language::Lua
540 | Language::R => Visibility::Public,
541 }
542}
543
544pub fn extract_calls(node: Node<'_>, source_code: &str, language: Language) -> Vec<String> {
546 let mut calls = HashSet::new();
547
548 let body_node = find_body_node(node, language);
549 if let Some(body) = body_node {
550 collect_calls_recursive(body, source_code, language, &mut calls);
551 }
552
553 if calls.is_empty() {
554 collect_calls_recursive(node, source_code, language, &mut calls);
555 }
556
557 calls.into_iter().collect()
558}
559
560pub fn find_body_node(node: Node<'_>, language: Language) -> Option<Node<'_>> {
562 match language {
563 Language::Python => {
564 for child in node.children(&mut node.walk()) {
565 if child.kind() == "block" {
566 return Some(child);
567 }
568 }
569 },
570 Language::Rust => {
571 for child in node.children(&mut node.walk()) {
572 if child.kind() == "block" {
573 return Some(child);
574 }
575 }
576 },
577 Language::JavaScript | Language::TypeScript => {
578 for child in node.children(&mut node.walk()) {
579 let kind = child.kind();
580 if kind == "statement_block" {
581 return Some(child);
582 }
583 if kind == "arrow_function" {
584 if let Some(body) = find_body_node(child, language) {
585 return Some(body);
586 }
587 return Some(child);
588 }
589 }
590 if node.kind() == "arrow_function" {
591 for child in node.children(&mut node.walk()) {
592 let kind = child.kind();
593 if kind != "formal_parameters"
594 && kind != "identifier"
595 && kind != "=>"
596 && kind != "("
597 && kind != ")"
598 && kind != ","
599 {
600 return Some(child);
601 }
602 }
603 return Some(node);
604 }
605 },
606 Language::Go => {
607 for child in node.children(&mut node.walk()) {
608 if child.kind() == "block" {
609 return Some(child);
610 }
611 }
612 },
613 Language::Java => {
614 for child in node.children(&mut node.walk()) {
615 if child.kind() == "block" {
616 return Some(child);
617 }
618 }
619 },
620 Language::C | Language::Cpp => {
621 for child in node.children(&mut node.walk()) {
622 if child.kind() == "compound_statement" {
623 return Some(child);
624 }
625 }
626 },
627 Language::CSharp | Language::Php | Language::Kotlin | Language::Swift | Language::Scala => {
628 for child in node.children(&mut node.walk()) {
629 let kind = child.kind();
630 if kind == "block" || kind == "compound_statement" || kind == "function_body" {
631 return Some(child);
632 }
633 }
634 },
635 Language::Ruby => {
636 for child in node.children(&mut node.walk()) {
637 if child.kind() == "body_statement" || child.kind() == "do_block" {
638 return Some(child);
639 }
640 }
641 },
642 Language::Bash => {
643 for child in node.children(&mut node.walk()) {
644 if child.kind() == "compound_statement" {
645 return Some(child);
646 }
647 }
648 },
649 Language::Haskell
650 | Language::Elixir
651 | Language::Clojure
652 | Language::OCaml
653 | Language::FSharp
654 | Language::R => {
655 return Some(node);
656 },
657 Language::Lua => {
658 for child in node.children(&mut node.walk()) {
659 if child.kind() == "block" {
660 return Some(child);
661 }
662 }
663 },
664 }
665 None
666}
667
668pub fn collect_calls_recursive(
670 node: Node<'_>,
671 source_code: &str,
672 language: Language,
673 calls: &mut HashSet<String>,
674) {
675 let kind = node.kind();
676
677 let call_name = match language {
678 Language::Python => {
679 if kind == "call" {
680 node.child_by_field_name("function").and_then(|f| {
681 if f.kind() == "identifier" {
682 f.utf8_text(source_code.as_bytes()).ok().map(String::from)
683 } else if f.kind() == "attribute" {
684 f.child_by_field_name("attribute")
685 .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
686 .map(String::from)
687 } else {
688 None
689 }
690 })
691 } else {
692 None
693 }
694 },
695 Language::Rust => {
696 if kind == "call_expression" {
697 node.child_by_field_name("function").and_then(|f| {
698 if f.kind() == "identifier" {
699 f.utf8_text(source_code.as_bytes()).ok().map(String::from)
700 } else if f.kind() == "field_expression" {
701 f.child_by_field_name("field")
702 .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
703 .map(String::from)
704 } else if f.kind() == "scoped_identifier" {
705 f.utf8_text(source_code.as_bytes()).ok().map(String::from)
706 } else {
707 None
708 }
709 })
710 } else if kind == "macro_invocation" {
711 node.child_by_field_name("macro")
712 .and_then(|m| m.utf8_text(source_code.as_bytes()).ok())
713 .map(|s| format!("{}!", s))
714 } else {
715 None
716 }
717 },
718 Language::JavaScript | Language::TypeScript => {
719 if kind == "call_expression" {
720 node.child_by_field_name("function").and_then(|f| {
721 if f.kind() == "identifier" {
722 f.utf8_text(source_code.as_bytes()).ok().map(String::from)
723 } else if f.kind() == "member_expression" {
724 f.child_by_field_name("property")
725 .and_then(|p| p.utf8_text(source_code.as_bytes()).ok())
726 .map(String::from)
727 } else {
728 None
729 }
730 })
731 } else {
732 None
733 }
734 },
735 Language::Go => {
736 if kind == "call_expression" {
737 node.child_by_field_name("function").and_then(|f| {
738 if f.kind() == "identifier" {
739 f.utf8_text(source_code.as_bytes()).ok().map(String::from)
740 } else if f.kind() == "selector_expression" {
741 f.child_by_field_name("field")
742 .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
743 .map(String::from)
744 } else {
745 None
746 }
747 })
748 } else {
749 None
750 }
751 },
752 Language::Java => {
753 if kind == "method_invocation" {
754 node.child_by_field_name("name")
755 .and_then(|n| n.utf8_text(source_code.as_bytes()).ok())
756 .map(String::from)
757 } else {
758 None
759 }
760 },
761 Language::C | Language::Cpp => {
762 if kind == "call_expression" {
763 node.child_by_field_name("function").and_then(|f| {
764 if f.kind() == "identifier" {
765 f.utf8_text(source_code.as_bytes()).ok().map(String::from)
766 } else if f.kind() == "field_expression" {
767 f.child_by_field_name("field")
768 .and_then(|a| a.utf8_text(source_code.as_bytes()).ok())
769 .map(String::from)
770 } else {
771 None
772 }
773 })
774 } else {
775 None
776 }
777 },
778 Language::CSharp | Language::Php | Language::Kotlin | Language::Swift | Language::Scala => {
779 if kind == "invocation_expression" || kind == "call_expression" {
780 node.children(&mut node.walk())
781 .find(|child| child.kind() == "identifier" || child.kind() == "simple_name")
782 .and_then(|child| child.utf8_text(source_code.as_bytes()).ok())
783 .map(|s| s.to_owned())
784 } else {
785 None
786 }
787 },
788 Language::Ruby => {
789 if kind == "call" || kind == "method_call" {
790 node.child_by_field_name("method")
791 .and_then(|m| m.utf8_text(source_code.as_bytes()).ok())
792 .map(String::from)
793 } else {
794 None
795 }
796 },
797 Language::Bash => {
798 if kind == "command" {
799 node.child_by_field_name("name")
800 .and_then(|n| n.utf8_text(source_code.as_bytes()).ok())
801 .map(String::from)
802 } else {
803 None
804 }
805 },
806 Language::Haskell
807 | Language::Elixir
808 | Language::Clojure
809 | Language::OCaml
810 | Language::FSharp
811 | Language::Lua
812 | Language::R => {
813 if kind == "function_call" || kind == "call" || kind == "application" {
814 node.children(&mut node.walk())
815 .find(|child| child.kind() == "identifier" || child.kind() == "variable")
816 .and_then(|child| child.utf8_text(source_code.as_bytes()).ok())
817 .map(|s| s.to_owned())
818 } else {
819 None
820 }
821 },
822 };
823
824 if let Some(name) = call_name {
825 if !is_builtin(&name, language) {
826 calls.insert(name);
827 }
828 }
829
830 for child in node.children(&mut node.walk()) {
831 collect_calls_recursive(child, source_code, language, calls);
832 }
833}
834
835pub fn is_builtin(name: &str, language: Language) -> bool {
837 match language {
838 Language::Python => {
839 matches!(
840 name,
841 "print"
842 | "len"
843 | "range"
844 | "str"
845 | "int"
846 | "float"
847 | "list"
848 | "dict"
849 | "set"
850 | "tuple"
851 | "bool"
852 | "type"
853 | "isinstance"
854 | "hasattr"
855 | "getattr"
856 | "setattr"
857 | "super"
858 | "iter"
859 | "next"
860 | "open"
861 | "input"
862 | "format"
863 | "enumerate"
864 | "zip"
865 | "map"
866 | "filter"
867 | "sorted"
868 | "reversed"
869 | "sum"
870 | "min"
871 | "max"
872 | "abs"
873 | "round"
874 | "ord"
875 | "chr"
876 | "hex"
877 | "bin"
878 | "oct"
879 )
880 },
881 Language::JavaScript | Language::TypeScript => {
882 matches!(
883 name,
884 "console"
885 | "log"
886 | "error"
887 | "warn"
888 | "parseInt"
889 | "parseFloat"
890 | "setTimeout"
891 | "setInterval"
892 | "clearTimeout"
893 | "clearInterval"
894 | "JSON"
895 | "stringify"
896 | "parse"
897 | "toString"
898 | "valueOf"
899 | "push"
900 | "pop"
901 | "shift"
902 | "unshift"
903 | "slice"
904 | "splice"
905 | "map"
906 | "filter"
907 | "reduce"
908 | "forEach"
909 | "find"
910 | "findIndex"
911 | "includes"
912 | "indexOf"
913 | "join"
914 | "split"
915 | "replace"
916 )
917 },
918 Language::Rust => {
919 matches!(
920 name,
921 "println!"
922 | "print!"
923 | "eprintln!"
924 | "eprint!"
925 | "format!"
926 | "vec!"
927 | "panic!"
928 | "assert!"
929 | "assert_eq!"
930 | "assert_ne!"
931 | "debug!"
932 | "info!"
933 | "warn!"
934 | "error!"
935 | "trace!"
936 | "unwrap"
937 | "expect"
938 | "ok"
939 | "err"
940 | "some"
941 | "none"
942 | "clone"
943 | "to_string"
944 | "into"
945 | "from"
946 | "default"
947 | "iter"
948 | "into_iter"
949 | "collect"
950 | "map"
951 | "filter"
952 )
953 },
954 Language::Go => {
955 matches!(
956 name,
957 "fmt"
958 | "Println"
959 | "Printf"
960 | "Sprintf"
961 | "Errorf"
962 | "make"
963 | "new"
964 | "len"
965 | "cap"
966 | "append"
967 | "copy"
968 | "delete"
969 | "close"
970 | "panic"
971 | "recover"
972 | "print"
973 )
974 },
975 Language::Java => {
976 matches!(
977 name,
978 "println"
979 | "print"
980 | "printf"
981 | "toString"
982 | "equals"
983 | "hashCode"
984 | "getClass"
985 | "clone"
986 | "notify"
987 | "wait"
988 | "get"
989 | "set"
990 | "add"
991 | "remove"
992 | "size"
993 | "isEmpty"
994 | "contains"
995 | "iterator"
996 | "valueOf"
997 | "parseInt"
998 )
999 },
1000 Language::C | Language::Cpp => {
1001 matches!(
1002 name,
1003 "printf"
1004 | "scanf"
1005 | "malloc"
1006 | "free"
1007 | "memcpy"
1008 | "memset"
1009 | "strlen"
1010 | "strcpy"
1011 | "strcmp"
1012 | "strcat"
1013 | "sizeof"
1014 | "cout"
1015 | "cin"
1016 | "endl"
1017 | "cerr"
1018 | "clog"
1019 )
1020 },
1021 Language::CSharp => {
1022 matches!(
1023 name,
1024 "WriteLine"
1025 | "Write"
1026 | "ReadLine"
1027 | "ToString"
1028 | "Equals"
1029 | "GetHashCode"
1030 | "GetType"
1031 | "Add"
1032 | "Remove"
1033 | "Contains"
1034 | "Count"
1035 | "Clear"
1036 | "ToList"
1037 | "ToArray"
1038 )
1039 },
1040 Language::Ruby => {
1041 matches!(
1042 name,
1043 "puts"
1044 | "print"
1045 | "p"
1046 | "gets"
1047 | "each"
1048 | "map"
1049 | "select"
1050 | "reject"
1051 | "reduce"
1052 | "inject"
1053 | "find"
1054 | "any?"
1055 | "all?"
1056 | "include?"
1057 | "empty?"
1058 | "nil?"
1059 | "length"
1060 | "size"
1061 )
1062 },
1063 Language::Php => {
1064 matches!(
1065 name,
1066 "echo"
1067 | "print"
1068 | "var_dump"
1069 | "print_r"
1070 | "isset"
1071 | "empty"
1072 | "array"
1073 | "count"
1074 | "strlen"
1075 | "strpos"
1076 | "substr"
1077 | "explode"
1078 | "implode"
1079 | "json_encode"
1080 | "json_decode"
1081 )
1082 },
1083 Language::Kotlin => {
1084 matches!(
1085 name,
1086 "println"
1087 | "print"
1088 | "readLine"
1089 | "toString"
1090 | "equals"
1091 | "hashCode"
1092 | "map"
1093 | "filter"
1094 | "forEach"
1095 | "let"
1096 | "also"
1097 | "apply"
1098 | "run"
1099 | "with"
1100 | "listOf"
1101 | "mapOf"
1102 | "setOf"
1103 )
1104 },
1105 Language::Swift => {
1106 matches!(
1107 name,
1108 "print"
1109 | "debugPrint"
1110 | "dump"
1111 | "map"
1112 | "filter"
1113 | "reduce"
1114 | "forEach"
1115 | "contains"
1116 | "count"
1117 | "isEmpty"
1118 | "append"
1119 )
1120 },
1121 Language::Scala => {
1122 matches!(
1123 name,
1124 "println"
1125 | "print"
1126 | "map"
1127 | "filter"
1128 | "flatMap"
1129 | "foreach"
1130 | "reduce"
1131 | "fold"
1132 | "foldLeft"
1133 | "foldRight"
1134 | "collect"
1135 )
1136 },
1137 Language::Bash
1138 | Language::Haskell
1139 | Language::Elixir
1140 | Language::Clojure
1141 | Language::OCaml
1142 | Language::FSharp
1143 | Language::Lua
1144 | Language::R => false,
1145 }
1146}
1147
1148pub fn clean_jsdoc(text: &str) -> String {
1150 text.lines()
1151 .map(|line| {
1152 line.trim()
1153 .trim_start_matches("/**")
1154 .trim_start_matches("/*")
1155 .trim_start_matches('*')
1156 .trim_end_matches("*/")
1157 .trim()
1158 })
1159 .filter(|line| !line.is_empty())
1160 .collect::<Vec<_>>()
1161 .join(" ")
1162}
1163
1164pub fn clean_javadoc(text: &str) -> String {
1166 clean_jsdoc(text)
1167}
1168
1169pub fn extract_inheritance(
1171 node: Node<'_>,
1172 source_code: &str,
1173 language: Language,
1174) -> (Option<String>, Vec<String>) {
1175 let mut extends = None;
1176 let mut implements = Vec::new();
1177
1178 match language {
1179 Language::Python => {
1180 if node.kind() == "class_definition" {
1182 if let Some(args) = node.child_by_field_name("superclasses") {
1183 for child in args.children(&mut args.walk()) {
1184 if child.kind() == "identifier" || child.kind() == "attribute" {
1185 if let Ok(name) = child.utf8_text(source_code.as_bytes()) {
1186 if extends.is_none() {
1187 extends = Some(name.to_owned());
1188 } else {
1189 implements.push(name.to_owned());
1190 }
1191 }
1192 }
1193 }
1194 }
1195 }
1196 },
1197 Language::JavaScript | Language::TypeScript => {
1198 if node.kind() == "class_declaration" || node.kind() == "class" {
1200 for child in node.children(&mut node.walk()) {
1201 if child.kind() == "class_heritage" {
1202 for heritage in child.children(&mut child.walk()) {
1203 if heritage.kind() == "extends_clause" {
1204 for type_node in heritage.children(&mut heritage.walk()) {
1205 if type_node.kind() == "identifier"
1206 || type_node.kind() == "type_identifier"
1207 {
1208 if let Ok(name) =
1209 type_node.utf8_text(source_code.as_bytes())
1210 {
1211 extends = Some(name.to_owned());
1212 }
1213 }
1214 }
1215 } else if heritage.kind() == "implements_clause" {
1216 for type_node in heritage.children(&mut heritage.walk()) {
1217 if type_node.kind() == "identifier"
1218 || type_node.kind() == "type_identifier"
1219 {
1220 if let Ok(name) =
1221 type_node.utf8_text(source_code.as_bytes())
1222 {
1223 implements.push(name.to_owned());
1224 }
1225 }
1226 }
1227 }
1228 }
1229 }
1230 }
1231 }
1232 },
1233 Language::Rust => {
1234 if node.kind() == "impl_item" {
1237 let mut has_for = false;
1238 for child in node.children(&mut node.walk()) {
1239 if child.kind() == "for" {
1240 has_for = true;
1241 }
1242 if child.kind() == "type_identifier" || child.kind() == "generic_type" {
1243 if let Ok(name) = child.utf8_text(source_code.as_bytes()) {
1244 if has_for {
1245 } else {
1247 implements.push(name.to_owned());
1249 }
1250 }
1251 }
1252 }
1253 }
1254 },
1255 Language::Go => {
1256 if node.kind() == "type_declaration" {
1258 for child in node.children(&mut node.walk()) {
1259 if child.kind() == "type_spec" {
1260 for spec_child in child.children(&mut child.walk()) {
1261 if spec_child.kind() == "struct_type" {
1262 for field in spec_child.children(&mut spec_child.walk()) {
1263 if field.kind() == "field_declaration" {
1264 let has_name = field.child_by_field_name("name").is_some();
1266 if !has_name {
1267 if let Some(type_node) =
1268 field.child_by_field_name("type")
1269 {
1270 if let Ok(name) =
1271 type_node.utf8_text(source_code.as_bytes())
1272 {
1273 implements.push(name.to_owned());
1274 }
1275 }
1276 }
1277 }
1278 }
1279 }
1280 }
1281 }
1282 }
1283 }
1284 },
1285 Language::Java => {
1286 if node.kind() == "class_declaration" {
1288 for child in node.children(&mut node.walk()) {
1289 if child.kind() == "superclass" {
1290 for type_node in child.children(&mut child.walk()) {
1291 if type_node.kind() == "type_identifier" {
1292 if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1293 extends = Some(name.to_owned());
1294 }
1295 }
1296 }
1297 } else if child.kind() == "super_interfaces" {
1298 for type_list in child.children(&mut child.walk()) {
1299 if type_list.kind() == "type_list" {
1300 for type_node in type_list.children(&mut type_list.walk()) {
1301 if type_node.kind() == "type_identifier" {
1302 if let Ok(name) =
1303 type_node.utf8_text(source_code.as_bytes())
1304 {
1305 implements.push(name.to_owned());
1306 }
1307 }
1308 }
1309 }
1310 }
1311 }
1312 }
1313 }
1314 },
1315 Language::C | Language::Cpp => {
1316 if node.kind() == "class_specifier" || node.kind() == "struct_specifier" {
1318 for child in node.children(&mut node.walk()) {
1319 if child.kind() == "base_class_clause" {
1320 for base in child.children(&mut child.walk()) {
1321 if base.kind() == "type_identifier" {
1322 if let Ok(name) = base.utf8_text(source_code.as_bytes()) {
1323 if extends.is_none() {
1324 extends = Some(name.to_owned());
1325 } else {
1326 implements.push(name.to_owned());
1327 }
1328 }
1329 }
1330 }
1331 }
1332 }
1333 }
1334 },
1335 Language::CSharp => {
1336 if node.kind() == "class_declaration" {
1338 for child in node.children(&mut node.walk()) {
1339 if child.kind() == "base_list" {
1340 for base in child.children(&mut child.walk()) {
1341 if base.kind() == "identifier" || base.kind() == "generic_name" {
1342 if let Ok(name) = base.utf8_text(source_code.as_bytes()) {
1343 if name.starts_with('I') && name.len() > 1 {
1344 implements.push(name.to_owned());
1346 } else if extends.is_none() {
1347 extends = Some(name.to_owned());
1348 } else {
1349 implements.push(name.to_owned());
1350 }
1351 }
1352 }
1353 }
1354 }
1355 }
1356 }
1357 },
1358 Language::Ruby => {
1359 if node.kind() == "class" {
1361 for child in node.children(&mut node.walk()) {
1362 if child.kind() == "superclass" {
1363 for type_node in child.children(&mut child.walk()) {
1364 if type_node.kind() == "constant" {
1365 if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1366 extends = Some(name.to_owned());
1367 }
1368 }
1369 }
1370 }
1371 }
1372 }
1373 },
1374 Language::Php => {
1375 if node.kind() == "class_declaration" {
1377 for child in node.children(&mut node.walk()) {
1378 if child.kind() == "base_clause" {
1379 for type_node in child.children(&mut child.walk()) {
1380 if type_node.kind() == "name" {
1381 if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1382 extends = Some(name.to_owned());
1383 }
1384 }
1385 }
1386 } else if child.kind() == "class_interface_clause" {
1387 for type_node in child.children(&mut child.walk()) {
1388 if type_node.kind() == "name" {
1389 if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1390 implements.push(name.to_owned());
1391 }
1392 }
1393 }
1394 }
1395 }
1396 }
1397 },
1398 Language::Kotlin => {
1399 if node.kind() == "class_declaration" {
1401 for child in node.children(&mut node.walk()) {
1402 if child.kind() == "delegation_specifiers" {
1403 for spec in child.children(&mut child.walk()) {
1404 if spec.kind() == "delegation_specifier" {
1405 for type_node in spec.children(&mut spec.walk()) {
1406 if type_node.kind() == "user_type" {
1407 if let Ok(name) =
1408 type_node.utf8_text(source_code.as_bytes())
1409 {
1410 if extends.is_none() {
1411 extends = Some(name.to_owned());
1412 } else {
1413 implements.push(name.to_owned());
1414 }
1415 }
1416 }
1417 }
1418 }
1419 }
1420 }
1421 }
1422 }
1423 },
1424 Language::Swift => {
1425 if node.kind() == "class_declaration" {
1427 for child in node.children(&mut node.walk()) {
1428 if child.kind() == "type_inheritance_clause" {
1429 for type_node in child.children(&mut child.walk()) {
1430 if type_node.kind() == "type_identifier" {
1431 if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1432 if extends.is_none() {
1433 extends = Some(name.to_owned());
1434 } else {
1435 implements.push(name.to_owned());
1436 }
1437 }
1438 }
1439 }
1440 }
1441 }
1442 }
1443 },
1444 Language::Scala => {
1445 if node.kind() == "class_definition" {
1447 for child in node.children(&mut node.walk()) {
1448 if child.kind() == "extends_clause" {
1449 for type_node in child.children(&mut child.walk()) {
1450 if type_node.kind() == "type_identifier" {
1451 if let Ok(name) = type_node.utf8_text(source_code.as_bytes()) {
1452 if extends.is_none() {
1453 extends = Some(name.to_owned());
1454 } else {
1455 implements.push(name.to_owned());
1456 }
1457 }
1458 }
1459 }
1460 }
1461 }
1462 }
1463 },
1464 Language::Bash
1465 | Language::Haskell
1466 | Language::Elixir
1467 | Language::Clojure
1468 | Language::OCaml
1469 | Language::FSharp
1470 | Language::Lua
1471 | Language::R => {},
1472 }
1473
1474 (extends, implements)
1475}
1476
1477pub fn map_symbol_kind(capture_name: &str) -> SymbolKind {
1479 match capture_name {
1480 "function" => SymbolKind::Function,
1481 "class" => SymbolKind::Class,
1482 "method" => SymbolKind::Method,
1483 "struct" => SymbolKind::Struct,
1484 "enum" => SymbolKind::Enum,
1485 "interface" => SymbolKind::Interface,
1486 "trait" => SymbolKind::Trait,
1487 _ => SymbolKind::Function,
1488 }
1489}