1mod entity_extractor;
2pub mod languages;
3
4use std::cell::RefCell;
5use std::collections::HashMap;
6
7use crate::model::entity::SemanticEntity;
8use crate::parser::plugin::SemanticParserPlugin;
9use crate::utils::hash::{content_hash, structural_hash};
10use languages::{get_all_code_extensions, get_language_config};
11use entity_extractor::extract_entities;
12
13pub struct CodeParserPlugin;
14
15thread_local! {
18 static PARSER_CACHE: RefCell<HashMap<&'static str, tree_sitter::Parser>> = RefCell::new(HashMap::new());
19}
20
21fn language_config_for_content(
22 content: &str,
23 file_path: &str,
24) -> Option<&'static languages::LanguageConfig> {
25 let ext = std::path::Path::new(file_path)
26 .extension()
27 .and_then(|e| e.to_str())
28 .map(|e| format!(".{}", e.to_lowercase()))
29 .unwrap_or_default();
30
31 get_language_config(&ext).or_else(|| {
32 detect_ext_from_content(content).and_then(|shebang_ext| get_language_config(&shebang_ext))
33 })
34}
35
36fn parse_tree(
37 config: &'static languages::LanguageConfig,
38 content: &str,
39) -> Option<tree_sitter::Tree> {
40 let language = (config.get_language)()?;
41
42 PARSER_CACHE.with(|cache| {
43 let mut cache = cache.borrow_mut();
44 let parser = cache.entry(config.id).or_insert_with(|| {
45 let mut p = tree_sitter::Parser::new();
46 let _ = p.set_language(&language);
47 p
48 });
49
50 parser.parse(content.as_bytes(), None)
51 })
52}
53
54fn has_non_comment_content(node: tree_sitter::Node, source: &[u8]) -> bool {
55 let mut worklist = Vec::new();
56 let mut cursor = node.walk();
57 worklist.extend(node.children(&mut cursor));
58
59 while let Some(node) = worklist.pop() {
60 if is_comment_node(node.kind()) {
61 continue;
62 }
63
64 if node.child_count() == 0 {
65 let start = node.start_byte();
66 let end = node.end_byte();
67 if start < end
68 && end <= source.len()
69 && source[start..end].iter().any(|b| !b.is_ascii_whitespace())
70 {
71 return true;
72 }
73 continue;
74 }
75
76 let mut cursor = node.walk();
77 worklist.extend(node.children(&mut cursor));
78 }
79
80 false
81}
82
83fn is_comment_node(kind: &str) -> bool {
84 matches!(
85 kind,
86 "comment" | "line_comment" | "block_comment" | "doc_comment" | "tag_comment"
87 )
88}
89
90fn shebang_line(content: &str) -> Option<&str> {
91 content
92 .strip_prefix("#!")
93 .map(|rest| rest.lines().next().unwrap_or(""))
94}
95
96impl SemanticParserPlugin for CodeParserPlugin {
97 fn id(&self) -> &str {
98 "code"
99 }
100
101 fn extensions(&self) -> &[&str] {
102 get_all_code_extensions()
103 }
104
105 fn extract_entities(&self, content: &str, file_path: &str) -> Vec<SemanticEntity> {
106 self.extract_entities_with_tree(content, file_path).0
107 }
108
109 fn extract_entities_with_tree(
110 &self,
111 content: &str,
112 file_path: &str,
113 ) -> (Vec<SemanticEntity>, Option<tree_sitter::Tree>) {
114 let Some(config) = language_config_for_content(content, file_path) else {
115 return (Vec::new(), None);
116 };
117
118 let Some(tree) = parse_tree(config, content) else {
119 return (Vec::new(), None);
120 };
121
122 let entities = extract_entities(&tree, file_path, config, content);
123 (entities, Some(tree))
124 }
125
126 fn structural_hash_content(&self, content: &str, file_path: &str) -> Option<String> {
127 let config = language_config_for_content(content, file_path)?;
128 let tree = parse_tree(config, content)?;
129 let shebang = shebang_line(content);
130 if shebang.is_none() && !has_non_comment_content(tree.root_node(), content.as_bytes()) {
131 return Some(String::new());
132 }
133 let structural = structural_hash(tree.root_node(), content.as_bytes());
134 match shebang {
135 Some(shebang) => Some(content_hash(&format!("shebang:{shebang}\n{structural}"))),
136 None => Some(structural),
137 }
138 }
139}
140
141use crate::parser::registry::detect_ext_from_content;
142
143#[cfg(test)]
144mod tests {
145 use super::*;
146
147 #[test]
148 fn test_java_entity_extraction() {
149 let code = r#"
150package com.example;
151
152import java.util.List;
153
154public class UserService {
155 private String name;
156
157 public UserService(String name) {
158 this.name = name;
159 }
160
161 public List<User> getUsers() {
162 return db.findAll();
163 }
164
165 public void createUser(User user) {
166 db.save(user);
167 }
168}
169
170interface Repository<T> {
171 T findById(String id);
172 List<T> findAll();
173}
174
175enum Status {
176 ACTIVE,
177 INACTIVE,
178 DELETED
179}
180"#;
181 let plugin = CodeParserPlugin;
182 let entities = plugin.extract_entities(code, "UserService.java");
183 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
184 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
185 eprintln!("Java entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
186
187 assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
188 assert!(names.contains(&"Repository"), "Should find interface Repository, got: {:?}", names);
189 assert!(names.contains(&"Status"), "Should find enum Status, got: {:?}", names);
190
191 let field = entities
194 .iter()
195 .find(|e| e.entity_type == "field")
196 .expect("should extract the field entity");
197 assert_eq!(field.name, "name", "field should be named by its declarator, got: {:?}", field.name);
198 }
199
200 #[test]
201 fn test_java_nested_methods() {
202 let code = r#"
203public class Calculator {
204 public int add(int a, int b) {
205 return a + b;
206 }
207
208 public int subtract(int a, int b) {
209 return a - b;
210 }
211}
212"#;
213 let plugin = CodeParserPlugin;
214 let entities = plugin.extract_entities(code, "Calculator.java");
215 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
216 eprintln!("Java nested: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
217
218 assert!(names.contains(&"Calculator"), "Should find Calculator class");
219 assert!(names.contains(&"add"), "Should find add method, got: {:?}", names);
220 assert!(names.contains(&"subtract"), "Should find subtract method, got: {:?}", names);
221
222 let add = entities.iter().find(|e| e.name == "add").unwrap();
224 assert!(add.parent_id.is_some(), "add should have parent_id");
225 }
226
227 #[test]
228 fn test_c_entity_extraction() {
229 let code = r#"
230#include <stdio.h>
231
232struct Point {
233 int x;
234 int y;
235};
236
237enum Color {
238 RED,
239 GREEN,
240 BLUE
241};
242
243typedef struct {
244 char name[50];
245 int age;
246} Person;
247
248void greet(const char* name) {
249 printf("Hello, %s!\n", name);
250}
251
252int add(int a, int b) {
253 return a + b;
254}
255
256int main() {
257 greet("world");
258 return 0;
259}
260"#;
261 let plugin = CodeParserPlugin;
262 let entities = plugin.extract_entities(code, "main.c");
263 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
264 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
265 eprintln!("C entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
266
267 assert!(names.contains(&"greet"), "Should find greet function, got: {:?}", names);
268 assert!(names.contains(&"add"), "Should find add function, got: {:?}", names);
269 assert!(names.contains(&"main"), "Should find main function, got: {:?}", names);
270 assert!(names.contains(&"Point"), "Should find Point struct, got: {:?}", names);
271 assert!(names.contains(&"Color"), "Should find Color enum, got: {:?}", names);
272 }
273
274 #[test]
275 fn test_c_function_locals_not_extracted() {
276 let code = r#"
277int global_count = 0;
278int helper(void);
279
280int main(void) {
281 int local = helper();
282 const char *message = "hello";
283 return local + global_count;
284}
285"#;
286 let plugin = CodeParserPlugin;
287 let entities = plugin.extract_entities(code, "main.c");
288 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
289
290 assert!(names.contains(&"global_count"), "got: {:?}", names);
291 assert!(names.contains(&"helper"), "got: {:?}", names);
292 assert!(names.contains(&"main"), "got: {:?}", names);
293 assert!(!names.contains(&"local"), "got: {:?}", names);
294 assert!(!names.contains(&"message"), "got: {:?}", names);
295 }
296
297 #[test]
298 fn test_cpp_entity_extraction() {
299 let code = "namespace math {\nclass Vector3 {\npublic:\n float length() const { return 0; }\n};\n}\nvoid greet() {}\n";
300 let plugin = CodeParserPlugin;
301 let entities = plugin.extract_entities(code, "main.cpp");
302 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
303 assert!(names.contains(&"math"), "got: {:?}", names);
304 assert!(names.contains(&"Vector3"), "got: {:?}", names);
305 assert!(names.contains(&"greet"), "got: {:?}", names);
306 }
307
308 #[test]
309 fn test_cpp_function_locals_not_extracted() {
310 let code = r#"
311int global_value = 1;
312int helper();
313
314int main() {
315 int local = helper();
316 auto lambda = []() {
317 int lambda_local = 3;
318 return lambda_local;
319 };
320 return local + lambda();
321}
322"#;
323 let plugin = CodeParserPlugin;
324 let entities = plugin.extract_entities(code, "main.cpp");
325 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
326
327 assert!(names.contains(&"global_value"), "got: {:?}", names);
328 assert!(names.contains(&"helper"), "got: {:?}", names);
329 assert!(names.contains(&"main"), "got: {:?}", names);
330 assert!(!names.contains(&"local"), "got: {:?}", names);
331 assert!(!names.contains(&"lambda"), "got: {:?}", names);
332 assert!(!names.contains(&"lambda_local"), "got: {:?}", names);
333 }
334
335 #[test]
336 fn test_ruby_entity_extraction() {
337 let code = "module Auth\n class User\n def greet\n \"hi\"\n end\n end\nend\ndef helper(x)\n x * 2\nend\n";
338 let plugin = CodeParserPlugin;
339 let entities = plugin.extract_entities(code, "auth.rb");
340 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
341 assert!(names.contains(&"Auth"), "got: {:?}", names);
342 assert!(names.contains(&"User"), "got: {:?}", names);
343 assert!(names.contains(&"helper"), "got: {:?}", names);
344 }
345
346 #[test]
347 fn test_csharp_entity_extraction() {
348 let code = "namespace MyApp {\npublic class User {\n public string GetName() { return \"\"; }\n}\npublic enum Role { Admin, User }\n}\n";
349 let plugin = CodeParserPlugin;
350 let entities = plugin.extract_entities(code, "Models.cs");
351 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
352 assert!(names.contains(&"MyApp"), "got: {:?}", names);
353 assert!(names.contains(&"User"), "got: {:?}", names);
354 assert!(names.contains(&"Role"), "got: {:?}", names);
355 }
356
357 #[test]
358 fn test_swift_entity_extraction() {
359 let code = r#"
360import Foundation
361
362typealias Handler = (Int) -> Void
363
364prefix operator ~~~
365
366class UserService {
367 var name: String
368
369 init(name: String) {
370 self.name = name
371 }
372
373 deinit {
374 print("freed")
375 }
376
377 func getUsers() -> [User] {
378 return db.findAll()
379 }
380}
381
382struct Point {
383 var x: Double
384 var y: Double
385
386 subscript(index: Int) -> Double {
387 return x + y + Double(index)
388 }
389}
390
391enum Status {
392 case active
393 case inactive
394 case deleted
395}
396
397protocol Repository {
398 associatedtype Canvas
399 func findById(id: String) -> Canvas?
400 func findAll() -> [Canvas]
401}
402
403func helper(x: Int) -> Int {
404 return x * 2
405}
406"#;
407 let plugin = CodeParserPlugin;
408 let entities = plugin.extract_entities(code, "UserService.swift");
409 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
410 eprintln!("Swift entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
411
412 assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
413 assert!(names.contains(&"Point"), "Should find struct Point, got: {:?}", names);
414 assert!(names.contains(&"Status"), "Should find enum Status, got: {:?}", names);
415 assert!(names.contains(&"Repository"), "Should find protocol Repository, got: {:?}", names);
416 assert!(names.contains(&"Canvas"), "Should find associatedtype Canvas, got: {:?}", names);
417 assert!(names.contains(&"Handler"), "Should find typealias Handler, got: {:?}", names);
418 assert!(names.contains(&"~~~"), "Should find custom operator ~~~, got: {:?}", names);
419 assert!(names.contains(&"init"), "Should find initializer init, got: {:?}", names);
420 assert!(names.contains(&"deinit"), "Should find deinitializer deinit, got: {:?}", names);
421 assert!(names.contains(&"subscript"), "Should find subscript, got: {:?}", names);
422 assert!(names.contains(&"helper"), "Should find function helper, got: {:?}", names);
423
424 let handler = entities.iter().find(|e| e.name == "Handler").unwrap();
425 assert_eq!(handler.entity_type, "type");
426 assert!(handler.parent_id.is_none());
427
428 let operator = entities.iter().find(|e| e.name == "~~~").unwrap();
429 assert_eq!(operator.entity_type, "operator");
430 assert!(operator.parent_id.is_none());
431
432 let user_service = entities.iter().find(|e| e.name == "UserService").unwrap();
433 assert_eq!(user_service.entity_type, "class");
434
435 let initializer = entities.iter().find(|e| e.name == "init").unwrap();
436 assert_eq!(initializer.entity_type, "init");
437 assert_eq!(initializer.parent_id.as_deref(), Some(user_service.id.as_str()));
438 assert_eq!(initializer.id, "UserService.swift::class::UserService::init");
439
440 let deinitializer = entities.iter().find(|e| e.name == "deinit").unwrap();
441 assert_eq!(deinitializer.entity_type, "deinit");
442 assert_eq!(deinitializer.parent_id.as_deref(), Some(user_service.id.as_str()));
443 assert_eq!(
444 deinitializer.id,
445 "UserService.swift::class::UserService::deinit"
446 );
447
448 let point = entities.iter().find(|e| e.name == "Point").unwrap();
449 assert_eq!(point.entity_type, "struct");
450
451 let subscript = entities.iter().find(|e| e.name == "subscript").unwrap();
452 assert_eq!(subscript.entity_type, "subscript");
453 assert_eq!(subscript.parent_id.as_deref(), Some(point.id.as_str()));
454 assert_eq!(
455 subscript.id,
456 "UserService.swift::struct::Point::subscript"
457 );
458
459 let status = entities.iter().find(|e| e.name == "Status").unwrap();
460 assert_eq!(status.entity_type, "enum");
461
462 let repository = entities.iter().find(|e| e.name == "Repository").unwrap();
463 assert_eq!(repository.entity_type, "protocol");
464 assert_eq!(repository.id, "UserService.swift::protocol::Repository");
465
466 let canvas = entities.iter().find(|e| e.name == "Canvas").unwrap();
467 assert_eq!(canvas.entity_type, "associatedtype");
468 assert_eq!(canvas.parent_id.as_deref(), Some(repository.id.as_str()));
469 assert_eq!(
470 canvas.id,
471 "UserService.swift::protocol::Repository::Canvas"
472 );
473 }
474
475 #[test]
476 fn test_swift_multi_binding_property_extraction() {
477 let code = r#"
478struct Point {
479 var x, y: Int
480}
481"#;
482 let plugin = CodeParserPlugin;
483 let entities = plugin.extract_entities(code, "Point.swift");
484 let point = entities.iter().find(|e| e.name == "Point").unwrap();
485 let properties: Vec<_> = entities
486 .iter()
487 .filter(|e| e.entity_type == "property")
488 .collect();
489
490 assert_eq!(
491 properties.iter().map(|e| e.name.as_str()).collect::<Vec<_>>(),
492 vec!["x", "y"]
493 );
494 assert!(properties
495 .iter()
496 .all(|property| property.parent_id.as_deref() == Some(point.id.as_str())));
497 assert_eq!(properties[0].content, "var x: Int");
498 assert_eq!(properties[1].content, "var y: Int");
499 }
500
501 #[test]
502 fn test_swift_multi_binding_property_content_is_per_binding() {
503 let typed_code = r#"
504struct Types {
505 var x: Int, y: String
506}
507"#;
508 let plugin = CodeParserPlugin;
509 let typed_entities = plugin.extract_entities(typed_code, "Types.swift");
510 let typed_properties: Vec<_> = typed_entities
511 .iter()
512 .filter(|e| e.entity_type == "property")
513 .collect();
514 assert_eq!(typed_properties[0].content, "var x: Int");
515 assert_eq!(typed_properties[1].content, "var y: String");
516
517 let mixed_code = r#"
518struct Mixed {
519 var x, y: Int, z: String
520}
521"#;
522 let mixed_entities = plugin.extract_entities(mixed_code, "Mixed.swift");
523 let mixed_properties: Vec<_> = mixed_entities
524 .iter()
525 .filter(|e| e.entity_type == "property")
526 .collect();
527 assert_eq!(mixed_properties[0].content, "var x: Int");
528 assert_eq!(mixed_properties[1].content, "var y: Int");
529 assert_eq!(mixed_properties[2].content, "var z: String");
530
531 let generic_code = r#"
532struct GenericTypes {
533 var lookup: Dictionary<String, Int>, count: Int
534}
535"#;
536 let generic_entities = plugin.extract_entities(generic_code, "GenericTypes.swift");
537 let generic_properties: Vec<_> = generic_entities
538 .iter()
539 .filter(|e| e.entity_type == "property")
540 .collect();
541 assert_eq!(
542 generic_properties[0].content,
543 "var lookup: Dictionary<String, Int>"
544 );
545 assert_eq!(generic_properties[1].content, "var count: Int");
546
547 let initializer_code = r#"
548struct Initializers {
549 var a = Foo(), b = Bar()
550}
551"#;
552 let initializer_entities = plugin.extract_entities(initializer_code, "Initializers.swift");
553 let initializer_properties: Vec<_> = initializer_entities
554 .iter()
555 .filter(|e| e.entity_type == "property")
556 .collect();
557 assert!(initializer_properties[0].content.contains("Foo()"));
558 assert!(!initializer_properties[0].content.contains("Bar()"));
559 assert!(initializer_properties[1].content.contains("Bar()"));
560 assert!(!initializer_properties[1].content.contains("Foo()"));
561
562 let constants_code = r#"
563struct Constants {
564 let first, second, third: Int
565}
566"#;
567 let constants_entities = plugin.extract_entities(constants_code, "Constants.swift");
568 let constants_properties: Vec<_> = constants_entities
569 .iter()
570 .filter(|e| e.entity_type == "property")
571 .collect();
572 assert_eq!(
573 constants_properties.iter().map(|e| e.name.as_str()).collect::<Vec<_>>(),
574 vec!["first", "second", "third"]
575 );
576 assert_eq!(constants_properties[0].content, "let first: Int");
577 assert_eq!(constants_properties[1].content, "let second: Int");
578 assert_eq!(constants_properties[2].content, "let third: Int");
579
580 let semicolon_code = r#"
581struct Semicolons {
582 var left, right: Int; var next: Int
583}
584"#;
585 let semicolon_entities = plugin.extract_entities(semicolon_code, "Semicolons.swift");
586 let semicolon_properties: Vec<_> = semicolon_entities
587 .iter()
588 .filter(|e| e.entity_type == "property")
589 .collect();
590 assert_eq!(semicolon_properties[0].content, "var left: Int");
591 assert_eq!(semicolon_properties[1].content, "var right: Int");
592 assert_eq!(semicolon_properties[2].content, "var next: Int");
593 }
594
595 #[test]
596 fn test_swift_body_locals_not_extracted_as_properties() {
597 let code = r#"
598class Cache {
599 var stored: Int
600
601 var computed: Int {
602 let computedLocal = stored + 1
603 func computedNested() -> Int {
604 return computedLocal
605 }
606 return computedNested()
607 }
608
609 var explicit: Int {
610 get {
611 let getterLocal = stored
612 func getterNested() -> Int {
613 return getterLocal
614 }
615 return getterNested()
616 }
617 }
618
619 init(seed: Int) {
620 let initial = seed
621 self.stored = initial
622 }
623
624 func value() -> Int {
625 let doubled = stored * 2
626 var offset = doubled + 1
627 func nested() -> Int {
628 let insideNested = offset
629 return insideNested
630 }
631 return nested()
632 }
633
634 subscript(index: Int) -> Int {
635 let shifted = index + stored
636 func subscriptNested() -> Int {
637 return shifted
638 }
639 return subscriptNested()
640 }
641
642 deinit {
643 let closing = stored
644 _ = closing
645 }
646}
647"#;
648 let plugin = CodeParserPlugin;
649 let entities = plugin.extract_entities(code, "Cache.swift");
650 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
651
652 assert!(names.contains(&"Cache"), "got: {:?}", names);
653 assert!(names.contains(&"stored"), "got: {:?}", names);
654 assert!(names.contains(&"computed"), "got: {:?}", names);
655 assert!(names.contains(&"explicit"), "got: {:?}", names);
656 assert!(names.contains(&"init"), "got: {:?}", names);
657 assert!(names.contains(&"value"), "got: {:?}", names);
658 assert!(names.contains(&"computedNested"), "got: {:?}", names);
659 assert!(names.contains(&"getterNested"), "got: {:?}", names);
660 assert!(names.contains(&"nested"), "got: {:?}", names);
661 assert!(names.contains(&"subscriptNested"), "got: {:?}", names);
662 assert!(names.contains(&"subscript"), "got: {:?}", names);
663 assert!(names.contains(&"deinit"), "got: {:?}", names);
664 assert!(!names.contains(&"Int"), "got: {:?}", names);
665
666 for local in [
667 "computedLocal",
668 "getterLocal",
669 "initial",
670 "doubled",
671 "offset",
672 "insideNested",
673 "shifted",
674 "closing",
675 ] {
676 assert!(!names.contains(&local), "{local} should not be an entity. Got: {:?}", names);
677 }
678 }
679
680 #[test]
681 fn test_swift_suppressed_multi_binding_initializers_are_traversed() {
682 let code = r#"
683func outer() {
684 let a = { func innerA() -> Int { 1 } },
685 b = { func innerB() -> Int { 2 } }
686}
687"#;
688 let plugin = CodeParserPlugin;
689 let entities = plugin.extract_entities(code, "Locals.swift");
690 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
691
692 assert!(names.contains(&"outer"), "got: {:?}", names);
693 assert!(names.contains(&"innerA"), "got: {:?}", names);
694 assert!(names.contains(&"innerB"), "got: {:?}", names);
695 assert!(!names.contains(&"a"), "local binding should stay suppressed: {:?}", names);
696 assert!(!names.contains(&"b"), "local binding should stay suppressed: {:?}", names);
697 }
698
699 #[test]
700 fn test_swift_conditional_compilation_inside_struct() {
701 let code = r#"
702import ArgumentParser
703
704public struct TuistCommand: AsyncParsableCommand {
705 public init() {}
706
707 public static var configuration: CommandConfiguration {
708 let comment = "brace in string }"
709 let multiline = """
710 brace in multiline }
711 escaped \"""
712 """
713 /* brace in comment } */
714 CommandConfiguration(commandName: "tuist")
715 }
716
717 #if os(macOS)
718 public static var groupedSubcommands: [ParsableCommand.Type] {
719 [InstallCommand.self]
720 }
721 #else
722 public static var groupedSubcommands: [ParsableCommand.Type] {
723 []
724 }
725 #endif
726
727 public func run() async throws {}
728}
729"#;
730 let plugin = CodeParserPlugin;
731 let entities = plugin.extract_entities(code, "TuistCommand.swift");
732 eprintln!(
733 "Swift conditional entities: {:?}",
734 entities
735 .iter()
736 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
737 .collect::<Vec<_>>()
738 );
739
740 let command = entities
741 .iter()
742 .find(|e| e.name == "TuistCommand")
743 .expect("Should recover TuistCommand struct");
744 assert_eq!(command.entity_type, "struct");
745 assert!(command.parent_id.is_none());
746
747 let renamed_code = code.replace("TuistCommand", "RenamedCommand");
748 let renamed_entities = plugin.extract_entities(&renamed_code, "TuistCommand.swift");
749 let renamed_command = renamed_entities
750 .iter()
751 .find(|e| e.name == "RenamedCommand")
752 .expect("Should recover renamed command struct");
753 assert_eq!(command.structural_hash, renamed_command.structural_hash);
754
755 for member in ["init", "configuration", "run"] {
756 let entity = entities
757 .iter()
758 .find(|e| e.name == member)
759 .unwrap_or_else(|| panic!("Should find {member}"));
760 assert_eq!(entity.parent_id.as_deref(), Some(command.id.as_str()));
761 }
762
763 let grouped_subcommands: Vec<_> = entities
764 .iter()
765 .filter(|e| e.name == "groupedSubcommands")
766 .collect();
767 assert_eq!(grouped_subcommands.len(), 2);
768 assert!(grouped_subcommands
769 .iter()
770 .all(|entity| entity.parent_id.as_deref() == Some(command.id.as_str())));
771 }
772
773 #[test]
774 fn test_swift_conditional_compilation_with_interpolated_brace_string() {
775 let plugin = CodeParserPlugin;
776 for (container_name, code) in [
777 (
778 "Config",
779 r#"
780class Config {
781 let tpl = "prefix \("}") suffix"
782#if DEBUG
783 func dump() { print(tpl) }
784#endif
785 func render() -> String { return tpl }
786}
787
788struct Tail { let q: Int }
789"#,
790 ),
791 (
792 "RawConfig",
793 r##"
794class RawConfig {
795 let tpl = #"prefix \#("{") suffix"#
796#if DEBUG
797 func dump() { print(tpl) }
798#endif
799 func render() -> String { return tpl }
800}
801"##,
802 ),
803 (
804 "MultilineConfig",
805 r#"
806class MultilineConfig {
807 let tpl = """
808 prefix \("}") suffix
809 """
810#if DEBUG
811 func dump() { print(tpl) }
812#endif
813 func render() -> String { return tpl }
814}
815"#,
816 ),
817 (
818 "ClosureConfig",
819 r#"
820class ClosureConfig {
821 let tpl = "prefix \(["}"].map { $0 }.joined()) suffix"
822#if DEBUG
823 func dump() { print(tpl) }
824#endif
825 func render() -> String { return tpl }
826}
827"#,
828 ),
829 ] {
830 let file_path = format!("{container_name}.swift");
831 let entities = plugin.extract_entities(code, &file_path);
832 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
833 let container = entities
834 .iter()
835 .find(|e| e.name == container_name)
836 .unwrap_or_else(|| {
837 panic!("Should recover {container_name}, got: {names:?}");
838 });
839 assert_eq!(container.entity_type, "class");
840 assert!(container.parent_id.is_none());
841
842 for member in ["tpl", "dump", "render"] {
843 let entity = entities
844 .iter()
845 .find(|e| e.name == member)
846 .unwrap_or_else(|| {
847 panic!("Should find {member} in {container_name}, got: {names:?}");
848 });
849 assert_eq!(entity.parent_id.as_deref(), Some(container.id.as_str()));
850 }
851 }
852 }
853
854 #[test]
855 fn test_elixir_entity_extraction() {
856 let code = r#"
857defmodule MyApp.Accounts do
858 def create_user(attrs) do
859 %User{}
860 |> User.changeset(attrs)
861 |> Repo.insert()
862 end
863
864 defp validate(attrs) do
865 # private helper
866 :ok
867 end
868
869 defmacro is_admin(user) do
870 quote do
871 unquote(user).role == :admin
872 end
873 end
874
875 defguard is_positive(x) when is_integer(x) and x > 0
876end
877
878defprotocol Printable do
879 def to_string(data)
880end
881
882defimpl Printable, for: Integer do
883 def to_string(i), do: Integer.to_string(i)
884end
885"#;
886 let plugin = CodeParserPlugin;
887 let entities = plugin.extract_entities(code, "accounts.ex");
888 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
889 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
890 eprintln!("Elixir entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
891
892 assert!(names.contains(&"MyApp.Accounts"), "Should find module, got: {:?}", names);
893 assert!(names.contains(&"create_user"), "Should find def, got: {:?}", names);
894 assert!(names.contains(&"validate"), "Should find defp, got: {:?}", names);
895 assert!(names.contains(&"is_admin"), "Should find defmacro, got: {:?}", names);
896 assert!(names.contains(&"Printable"), "Should find defprotocol, got: {:?}", names);
897
898 let create_user = entities.iter().find(|e| e.name == "create_user").unwrap();
900 assert!(create_user.parent_id.is_some(), "create_user should be nested under module");
901 }
902
903 #[test]
904 #[cfg(feature = "lang-clojure")]
905 fn test_clojure_entity_extraction() {
906 let code = r#"
907(ns my.app.core
908 (:require [clojure.string :as str]))
909
910(def my-var 42)
911
912(def ^:private secret "hunter2")
913
914(defonce connection (atom nil))
915
916(defn greet
917 "Returns a greeting string."
918 [name]
919 (str "Hello, " name "!"))
920
921(defmacro unless [pred & body]
922 `(when (not ~pred) ~@body))
923
924(defprotocol Greeter
925 (greet! [this name]))
926
927(defrecord Person [name age])
928
929(defmulti area :shape)
930
931(defmethod area :circle [{:keys [radius]}]
932 (* Math/PI radius radius))
933
934(defmethod area :rectangle [{:keys [width height]}]
935 (* width height))
936"#;
937 let plugin = CodeParserPlugin;
938 let entities = plugin.extract_entities(code, "core.clj");
939 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
940 eprintln!(
941 "Clojure entities: {:?}",
942 entities
943 .iter()
944 .map(|e| (&e.name, &e.entity_type))
945 .collect::<Vec<_>>()
946 );
947
948 assert!(!names.contains(&"my.app.core"), "Should not extract ns form as entity, got: {:?}", names);
949 assert!(names.contains(&"my-var"), "Should find def, got: {:?}", names);
950 assert!(names.contains(&"secret"), "Should strip ^:private metadata from name, got: {:?}", names);
951 assert!(names.contains(&"connection"), "Should find defonce, got: {:?}", names);
952 assert!(names.contains(&"greet"), "Should find defn, got: {:?}", names);
953 assert!(names.contains(&"unless"), "Should find defmacro, got: {:?}", names);
954 assert!(names.contains(&"Greeter"), "Should find defprotocol, got: {:?}", names);
955 assert!(names.contains(&"Person"), "Should find defrecord, got: {:?}", names);
956 assert!(names.contains(&"area"), "Should find defmulti, got: {:?}", names);
957 assert!(names.contains(&"area/:circle"), "Should find defmethod area :circle, got: {:?}", names);
959 assert!(names.contains(&"area/:rectangle"), "Should find defmethod area :rectangle, got: {:?}", names);
960 let ids: Vec<&str> = entities.iter().map(|e| e.id.as_str()).collect();
961 assert!(ids.iter().collect::<std::collections::HashSet<_>>().len() == ids.len(),
962 "All entity IDs must be unique, got: {:?}", ids);
963 }
964
965 #[test]
966 #[cfg(feature = "lang-clojure")]
967 fn test_clojure_defn_private() {
968 let code = r#"
969(ns my.app)
970
971(defn- private-helper [x]
972 (* x 2))
973"#;
974 let plugin = CodeParserPlugin;
975 let entities = plugin.extract_entities(code, "app.clj");
976 let entity = entities
977 .iter()
978 .find(|e| e.name == "private-helper")
979 .expect("Should extract defn- as a function entity");
980 assert_eq!(entity.entity_type, "function");
981 }
982
983 #[test]
984 #[cfg(feature = "lang-clojure")]
985 fn test_clojure_predicate_and_bang_functions() {
986 let code = r#"
987(ns my.app.validators)
988
989(defn empty? [coll]
990 (= 0 (count coll)))
991
992(defn reset! [state new-val]
993 (compare-and-set! state @state new-val))
994"#;
995 let plugin = CodeParserPlugin;
996 let entities = plugin.extract_entities(code, "validators.clj");
997 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
998 assert!(names.contains(&"empty?"), "Should extract predicate fn empty?, got: {:?}", names);
999 assert!(names.contains(&"reset!"), "Should extract bang fn reset!, got: {:?}", names);
1000 let empty_entity = entities.iter().find(|e| e.name == "empty?").unwrap();
1001 let reset_entity = entities.iter().find(|e| e.name == "reset!").unwrap();
1002 assert_eq!(empty_entity.entity_type, "function");
1003 assert_eq!(reset_entity.entity_type, "function");
1004 }
1005
1006 #[test]
1007 #[cfg(feature = "lang-clojure")]
1008 fn test_clojure_dynamic_vars_and_equality_fns() {
1009 let code = r#"
1010(ns my.app.core)
1011
1012(def *db* (atom nil))
1013
1014(defn not= [a b]
1015 (not (= a b)))
1016"#;
1017 let plugin = CodeParserPlugin;
1018 let entities = plugin.extract_entities(code, "core.clj");
1019 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1020 assert!(names.contains(&"*db*"), "Should extract dynamic var *db*, got: {:?}", names);
1021 assert!(names.contains(&"not="), "Should extract fn not=, got: {:?}", names);
1022 let db_entity = entities.iter().find(|e| e.name == "*db*").unwrap();
1023 let noteq_entity = entities.iter().find(|e| e.name == "not=").unwrap();
1024 assert_eq!(db_entity.entity_type, "var");
1025 assert_eq!(noteq_entity.entity_type, "function");
1026 }
1027
1028 #[test]
1029 #[cfg(feature = "lang-clojure")]
1030 fn test_clojure_deftype_definterface_defstruct() {
1031 let code = r#"
1032(ns my.app)
1033
1034(deftype MyType [field])
1035
1036(definterface IFoo
1037 (foo [this]))
1038
1039(defstruct point :x :y)
1040"#;
1041 let plugin = CodeParserPlugin;
1042 let entities = plugin.extract_entities(code, "app.clj");
1043 let by_name = |name: &str| entities.iter().find(|e| e.name == name);
1044
1045 assert!(by_name("MyType").is_some(), "Should extract deftype, got: {:?}", entities.iter().map(|e| &e.name).collect::<Vec<_>>());
1046 assert_eq!(by_name("MyType").unwrap().entity_type, "type");
1047
1048 assert!(by_name("IFoo").is_some(), "Should extract definterface, got: {:?}", entities.iter().map(|e| &e.name).collect::<Vec<_>>());
1049 assert_eq!(by_name("IFoo").unwrap().entity_type, "interface");
1050
1051 assert!(by_name("point").is_some(), "Should extract defstruct, got: {:?}", entities.iter().map(|e| &e.name).collect::<Vec<_>>());
1052 assert_eq!(by_name("point").unwrap().entity_type, "struct");
1053 }
1054
1055 #[test]
1056 #[cfg(feature = "lang-clojure")]
1057 fn test_clojure_cljc_extension() {
1058 let code = r#"
1059(ns my.app.shared)
1060
1061(defn platform-key [] :default)
1062
1063(def shared-value 99)
1064"#;
1065 let plugin = CodeParserPlugin;
1066 let entities = plugin.extract_entities(code, "shared.cljc");
1067 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1068 assert!(names.contains(&"platform-key"), "Should extract defn from .cljc, got: {:?}", names);
1069 assert!(names.contains(&"shared-value"), "Should extract def from .cljc, got: {:?}", names);
1070 }
1071
1072 #[test]
1073 #[cfg(feature = "lang-clojure")]
1074 fn test_clojure_defmethod_non_keyword_dispatch() {
1075 let code = r#"
1076(ns my.app)
1077
1078(defmulti process identity)
1079
1080(defmethod process nil [_] :nothing)
1081
1082(defmethod process "string" [s] s)
1083
1084(defmethod process 42 [n] n)
1085"#;
1086 let plugin = CodeParserPlugin;
1087 let entities = plugin.extract_entities(code, "app.clj");
1088 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1089 assert!(names.contains(&"process"), "Should extract defmulti, got: {:?}", names);
1090 assert!(names.contains(&"process/nil"), "Should extract defmethod with nil dispatch, got: {:?}", names);
1091 assert!(names.contains(&"process/\"string\""), "Should extract defmethod with string dispatch, got: {:?}", names);
1092 assert!(names.contains(&"process/42"), "Should extract defmethod with integer dispatch, got: {:?}", names);
1093 let ids: Vec<&str> = entities.iter().map(|e| e.id.as_str()).collect();
1094 assert!(
1095 ids.iter().collect::<std::collections::HashSet<_>>().len() == ids.len(),
1096 "All entity IDs must be unique, got: {:?}", ids
1097 );
1098 }
1099
1100 #[test]
1101 fn test_bash_entity_extraction() {
1102 let code = r#"#!/bin/bash
1103
1104greet() {
1105 echo "Hello, $1!"
1106}
1107
1108function deploy {
1109 echo "deploying..."
1110}
1111
1112# not a function
1113echo "main script"
1114"#;
1115 let plugin = CodeParserPlugin;
1116 let entities = plugin.extract_entities(code, "deploy.sh");
1117 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1118 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1119 eprintln!("Bash entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
1120
1121 assert!(names.contains(&"greet"), "Should find greet(), got: {:?}", names);
1122 assert!(names.contains(&"deploy"), "Should find function deploy, got: {:?}", names);
1123 assert_eq!(entities.len(), 2, "Should only find functions, got: {:?}", names);
1124 }
1125
1126 #[test]
1127 fn test_typescript_entity_extraction() {
1128 let code = r#"
1130export function hello(): string {
1131 return "hello";
1132}
1133
1134export class Greeter {
1135 greet(name: string): string {
1136 return `Hello, ${name}!`;
1137 }
1138}
1139"#;
1140 let plugin = CodeParserPlugin;
1141 let entities = plugin.extract_entities(code, "test.ts");
1142 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1143 assert!(names.contains(&"hello"), "Should find hello function");
1144 assert!(names.contains(&"Greeter"), "Should find Greeter class");
1145 }
1146
1147 #[test]
1148 fn test_same_line_typescript_overload_ids_are_unique() {
1149 let code = "function f(a: number): void {}; function f(a: string): void {}\n";
1150 let plugin = CodeParserPlugin;
1151 let entities = plugin.extract_entities(code, "over.ts");
1152 let overloads: Vec<&SemanticEntity> = entities
1153 .iter()
1154 .filter(|entity| entity.name == "f" && entity.entity_type == "function")
1155 .collect();
1156 let ids: Vec<&str> = overloads.iter().map(|entity| entity.id.as_str()).collect();
1157
1158 assert_eq!(overloads.len(), 2, "expected both overloads, got: {entities:?}");
1159 assert_eq!(ids, vec!["over.ts::function::f@L1#1", "over.ts::function::f@L1#2"]);
1160 }
1161
1162 #[test]
1163 fn test_same_line_duplicate_parent_ids_are_propagated_to_children() {
1164 let code = "class C { m(){ return 1 } } class C { m(){ return 2 } }\n";
1165 let plugin = CodeParserPlugin;
1166 let entities = plugin.extract_entities(code, "c.ts");
1167 let classes: Vec<&SemanticEntity> = entities
1168 .iter()
1169 .filter(|entity| entity.name == "C" && entity.entity_type == "class")
1170 .collect();
1171 let methods: Vec<&SemanticEntity> = entities
1172 .iter()
1173 .filter(|entity| entity.name == "m" && entity.entity_type == "method")
1174 .collect();
1175
1176 assert_eq!(classes.len(), 2, "expected both classes, got: {entities:?}");
1177 assert_eq!(methods.len(), 2, "expected both methods, got: {entities:?}");
1178 assert_eq!(classes[0].id, "c.ts::class::C@L1#1");
1179 assert_eq!(classes[1].id, "c.ts::class::C@L1#2");
1180 assert_eq!(methods[0].parent_id.as_deref(), Some("c.ts::class::C@L1#1"));
1181 assert_eq!(methods[1].parent_id.as_deref(), Some("c.ts::class::C@L1#2"));
1182 assert_eq!(methods[0].id, "c.ts::class::C@L1#1::m");
1183 assert_eq!(methods[1].id, "c.ts::class::C@L1#2::m");
1184 }
1185
1186 #[test]
1187 fn test_module_typescript_entity_extraction() {
1188 let code = r#"
1189export function hello(): string {
1190 return "hello";
1191}
1192"#;
1193 let plugin = CodeParserPlugin;
1194 let entities = plugin.extract_entities(code, "test.mts");
1195 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1196
1197 assert!(names.contains(&"hello"), "Should find hello function");
1198 }
1199
1200 #[test]
1201 fn test_commonjs_typescript_entity_extraction() {
1202 let code = r#"
1203export class Greeter {
1204 greet(name: string): string {
1205 return `Hello, ${name}!`;
1206 }
1207}
1208"#;
1209 let plugin = CodeParserPlugin;
1210 let entities = plugin.extract_entities(code, "test.cts");
1211 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1212
1213 assert!(names.contains(&"Greeter"), "Should find Greeter class");
1214 assert!(names.contains(&"greet"), "Should find greet method");
1215 }
1216
1217 #[test]
1218 fn test_typescript_generator_function_entity_extraction() {
1219 let code = r#"
1220export async function* streamUsers(): AsyncGenerator<string> {
1221 yield "alice";
1222}
1223"#;
1224 let plugin = CodeParserPlugin;
1225 let entities = plugin.extract_entities(code, "stream.ts");
1226 let stream = entities.iter().find(|e| e.name == "streamUsers");
1227
1228 assert!(stream.is_some(), "Should find generator function, got: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1229 assert_eq!(stream.unwrap().entity_type, "function");
1230 }
1231
1232 #[test]
1233 fn test_javascript_generator_function_entity_extraction() {
1234 let code = r#"
1235export function* ids() {
1236 yield 1;
1237 yield 2;
1238}
1239"#;
1240 let plugin = CodeParserPlugin;
1241 let entities = plugin.extract_entities(code, "ids.js");
1242 let ids = entities.iter().find(|e| e.name == "ids");
1243
1244 assert!(ids.is_some(), "Should find generator function, got: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1245 assert_eq!(ids.unwrap().entity_type, "function");
1246 }
1247
1248 #[test]
1249 fn test_nested_functions_typescript() {
1250 let code = r#"
1251function outer() {
1252 function inner() {
1253 return 42;
1254 }
1255 return inner();
1256}
1257"#;
1258 let plugin = CodeParserPlugin;
1259 let entities = plugin.extract_entities(code, "nested.ts");
1260 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1261 eprintln!("Nested TS: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
1262
1263 assert!(names.contains(&"outer"), "Should find outer, got: {:?}", names);
1264 assert!(names.contains(&"inner"), "Should find inner, got: {:?}", names);
1265
1266 let inner = entities.iter().find(|e| e.name == "inner").unwrap();
1267 assert!(inner.parent_id.is_some(), "inner should have parent_id");
1268 }
1269
1270 #[test]
1271 fn test_typescript_nested_anonymous_class_fields() {
1272 let code = r#"
1273class L1 {
1274 L2 = class {
1275 L3 = class {
1276 L4 = class {
1277 method() { return 1; }
1278 };
1279 };
1280 };
1281}
1282"#;
1283 let plugin = CodeParserPlugin;
1284 let entities = plugin.extract_entities(code, "a.ts");
1285 let find = |name: &str| {
1286 entities.iter().find(|e| e.name == name).unwrap_or_else(|| {
1287 panic!(
1288 "missing {name}; got: {:?}",
1289 entities
1290 .iter()
1291 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1292 .collect::<Vec<_>>()
1293 )
1294 })
1295 };
1296
1297 let l1 = find("L1");
1298 assert_eq!(l1.entity_type, "class");
1299 let l1_id = l1.id.clone();
1300
1301 let l2 = find("L2");
1302 assert_eq!(l2.entity_type, "field");
1303 assert_eq!(l2.parent_id.as_deref(), Some(l1_id.as_str()));
1304 let l2_id = l2.id.clone();
1305
1306 let l3 = find("L3");
1307 assert_eq!(l3.entity_type, "field");
1308 assert_eq!(l3.parent_id.as_deref(), Some(l2_id.as_str()));
1309 let l3_id = l3.id.clone();
1310
1311 let l4 = find("L4");
1312 assert_eq!(l4.entity_type, "field");
1313 assert_eq!(l4.parent_id.as_deref(), Some(l3_id.as_str()));
1314 let l4_id = l4.id.clone();
1315
1316 let method = find("method");
1317 assert_eq!(method.entity_type, "method");
1318 assert_eq!(method.parent_id.as_deref(), Some(l4_id.as_str()));
1319 assert_eq!(method.id, "a.ts::class::L1::L2::L3::L4::method");
1320 }
1321
1322 #[test]
1323 fn test_nested_functions_python() {
1324 let code = "def outer():\n def inner():\n return 42\n return inner()\n";
1325 let plugin = CodeParserPlugin;
1326 let entities = plugin.extract_entities(code, "nested.py");
1327 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1328
1329 assert!(names.contains(&"outer"), "got: {:?}", names);
1330 assert!(names.contains(&"inner"), "got: {:?}", names);
1331
1332 let inner = entities.iter().find(|e| e.name == "inner").unwrap();
1333 assert!(inner.parent_id.is_some(), "inner should have parent_id");
1334 }
1335
1336 #[test]
1337 fn test_nested_functions_rust() {
1338 let code = "fn outer() {\n fn inner() -> i32 {\n 42\n }\n inner();\n}\n";
1339 let plugin = CodeParserPlugin;
1340 let entities = plugin.extract_entities(code, "nested.rs");
1341 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1342
1343 assert!(names.contains(&"outer"), "got: {:?}", names);
1344 assert!(names.contains(&"inner"), "got: {:?}", names);
1345
1346 let inner = entities.iter().find(|e| e.name == "inner").unwrap();
1347 assert!(inner.parent_id.is_some(), "inner should have parent_id");
1348 }
1349
1350 #[test]
1351 fn test_rust_impl_blocks_unique_names() {
1352 let code = r#"
1353trait Greeting {
1354 fn greet(&self) -> String;
1355}
1356
1357struct Person;
1358struct Robot;
1359struct Cat;
1360
1361impl Greeting for Person {
1362 fn greet(&self) -> String { "Hello".to_string() }
1363}
1364
1365impl Greeting for Robot {
1366 fn greet(&self) -> String { "Beep".to_string() }
1367}
1368
1369impl Greeting for Cat {
1370 fn greet(&self) -> String { "Meow".to_string() }
1371}
1372"#;
1373 let plugin = CodeParserPlugin;
1374 let entities = plugin.extract_entities(code, "impls.rs");
1375 let impl_entities: Vec<&_> = entities.iter()
1376 .filter(|e| e.entity_type == "impl")
1377 .collect();
1378 let names: Vec<&str> = impl_entities.iter().map(|e| e.name.as_str()).collect();
1379
1380 assert_eq!(impl_entities.len(), 3, "Should find 3 impl blocks, got: {:?}", names);
1381 assert!(names.contains(&"Greeting for Person"), "got: {:?}", names);
1382 assert!(names.contains(&"Greeting for Robot"), "got: {:?}", names);
1383 assert!(names.contains(&"Greeting for Cat"), "got: {:?}", names);
1384 }
1385
1386 #[test]
1387 fn test_nested_functions_go() {
1388 let code = "package main\n\nfunc outer() {\n var x int = 42\n _ = x\n}\n";
1390 let plugin = CodeParserPlugin;
1391 let entities = plugin.extract_entities(code, "nested.go");
1392 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1393
1394 assert!(names.contains(&"outer"), "got: {:?}", names);
1395 }
1396
1397 #[test]
1398 fn test_renamed_function_same_structural_hash() {
1399 let code_a = "def get_card():\n return db.query('cards')\n";
1400 let code_b = "def get_card_1():\n return db.query('cards')\n";
1401
1402 let plugin = CodeParserPlugin;
1403 let entities_a = plugin.extract_entities(code_a, "a.py");
1404 let entities_b = plugin.extract_entities(code_b, "b.py");
1405
1406 assert_eq!(entities_a.len(), 1, "Should find one entity in a");
1407 assert_eq!(entities_b.len(), 1, "Should find one entity in b");
1408 assert_eq!(entities_a[0].name, "get_card");
1409 assert_eq!(entities_b[0].name, "get_card_1");
1410
1411 assert_eq!(
1413 entities_a[0].structural_hash, entities_b[0].structural_hash,
1414 "Renamed function with identical body should have same structural_hash"
1415 );
1416
1417 assert_ne!(
1419 entities_a[0].content_hash, entities_b[0].content_hash,
1420 "Content hash should differ since raw content includes the name"
1421 );
1422 }
1423
1424 #[test]
1425 fn test_swift_renamed_operator_same_structural_hash() {
1426 let plugin = CodeParserPlugin;
1427 let entities_a = plugin.extract_entities("prefix operator ~~~\n", "a.swift");
1428 let entities_b = plugin.extract_entities("prefix operator !!!\n", "b.swift");
1429
1430 assert_eq!(entities_a.len(), 1, "Should find one entity in a");
1431 assert_eq!(entities_b.len(), 1, "Should find one entity in b");
1432 assert_eq!(entities_a[0].name, "~~~");
1433 assert_eq!(entities_b[0].name, "!!!");
1434 assert_eq!(entities_a[0].entity_type, "operator");
1435 assert_eq!(entities_b[0].entity_type, "operator");
1436 assert_eq!(
1437 entities_a[0].structural_hash, entities_b[0].structural_hash,
1438 "Renamed operator with otherwise identical declaration should have same structural_hash"
1439 );
1440 assert_ne!(
1441 entities_a[0].content_hash, entities_b[0].content_hash,
1442 "Content hash should differ since raw content includes the operator token"
1443 );
1444 }
1445
1446 #[test]
1447 fn test_swift_synthesized_names_disambiguate_overloads() {
1448 let plugin = CodeParserPlugin;
1449 let code = r#"
1450struct Matrix {
1451 subscript(row: Int) -> Double {
1452 return Double(row)
1453 }
1454
1455 subscript(row: Int, column: Int) -> Double {
1456 return Double(row + column)
1457 }
1458}
1459
1460class Builder {
1461 init(value: Int) {}
1462 init(text: String) {}
1463}
1464"#;
1465
1466 let entities = plugin.extract_entities(code, "Overloads.swift");
1467
1468 let subscript_ids: Vec<&str> = entities
1469 .iter()
1470 .filter(|e| e.entity_type == "subscript")
1471 .map(|e| e.id.as_str())
1472 .collect();
1473 assert_eq!(subscript_ids.len(), 2);
1474 assert_ne!(subscript_ids[0], subscript_ids[1]);
1475 assert!(subscript_ids.iter().all(|id| id.contains("@L")));
1476
1477 let init_ids: Vec<&str> = entities
1478 .iter()
1479 .filter(|e| e.entity_type == "init")
1480 .map(|e| e.id.as_str())
1481 .collect();
1482 assert_eq!(init_ids.len(), 2);
1483 assert_ne!(init_ids[0], init_ids[1]);
1484 assert!(init_ids.iter().all(|id| id.contains("@L")));
1485 }
1486
1487 #[test]
1488 fn test_hcl_entity_extraction() {
1489 let code = r#"
1490region = "eu-west-1"
1491
1492variable "image_id" {
1493 type = string
1494}
1495
1496resource "aws_instance" "web" {
1497 ami = var.image_id
1498
1499 lifecycle {
1500 create_before_destroy = true
1501 }
1502}
1503"#;
1504 let plugin = CodeParserPlugin;
1505 let entities = plugin.extract_entities(code, "main.tf");
1506 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1507 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1508 eprintln!("HCL entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
1509
1510 assert!(names.contains(&"region"), "Should find top-level attribute, got: {:?}", names);
1511 assert!(names.contains(&"variable.image_id"), "Should find variable block, got: {:?}", names);
1512 assert!(names.contains(&"resource.aws_instance.web"), "Should find resource block, got: {:?}", names);
1513 assert!(
1514 names.contains(&"resource.aws_instance.web.lifecycle"),
1515 "Should find nested lifecycle block with qualified name, got: {:?}",
1516 names
1517 );
1518 assert!(!names.contains(&"ami"), "Should skip nested attributes inside blocks, got: {:?}", names);
1519 assert!(
1520 !names.contains(&"create_before_destroy"),
1521 "Should skip nested attributes inside nested blocks, got: {:?}",
1522 names
1523 );
1524
1525 let lifecycle = entities
1526 .iter()
1527 .find(|e| e.name == "resource.aws_instance.web.lifecycle")
1528 .unwrap();
1529 assert!(lifecycle.parent_id.is_some(), "lifecycle should be nested under resource");
1530 assert!(types.contains(&"attribute"), "Should preserve attribute entity type for top-level attributes");
1531 }
1532
1533 #[test]
1534 fn test_kotlin_entity_extraction() {
1535 let code = r#"
1536class UserService {
1537 val name: String = ""
1538
1539 fun greet(): String {
1540 return "Hello, $name"
1541 }
1542
1543 companion object {
1544 fun create(): UserService = UserService()
1545 }
1546}
1547
1548interface Repository {
1549 fun findById(id: Int): Any?
1550}
1551
1552object AppConfig {
1553 val version = "1.0"
1554}
1555
1556fun topLevel(x: Int): Int = x * 2
1557"#;
1558 let plugin = CodeParserPlugin;
1559 let entities = plugin.extract_entities(code, "App.kt");
1560 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1561 eprintln!("Kotlin entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1562 assert!(names.contains(&"UserService"), "got: {:?}", names);
1563 assert!(names.contains(&"greet"), "got: {:?}", names);
1564 assert!(names.contains(&"Repository"), "got: {:?}", names);
1565 assert!(names.contains(&"findById"), "got: {:?}", names);
1566 assert!(names.contains(&"AppConfig"), "got: {:?}", names);
1567 assert!(names.contains(&"topLevel"), "got: {:?}", names);
1568 }
1569
1570 #[test]
1571 fn test_xml_entity_extraction() {
1572 let code = r#"<?xml version="1.0" encoding="UTF-8"?>
1573<project>
1574 <groupId>com.example</groupId>
1575 <artifactId>my-app</artifactId>
1576 <dependencies>
1577 <dependency>
1578 <groupId>junit</groupId>
1579 <artifactId>junit</artifactId>
1580 </dependency>
1581 </dependencies>
1582 <build>
1583 <plugins>
1584 <plugin>
1585 <groupId>org.apache.maven</groupId>
1586 </plugin>
1587 </plugins>
1588 </build>
1589</project>
1590"#;
1591 let plugin = CodeParserPlugin;
1592 let entities = plugin.extract_entities(code, "pom.xml");
1593 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1594 eprintln!("XML entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1595 assert!(names.contains(&"project"), "got: {:?}", names);
1596 assert!(names.contains(&"dependencies"), "got: {:?}", names);
1597 assert!(names.contains(&"build"), "got: {:?}", names);
1598 }
1599
1600 #[test]
1601 fn test_arrow_callback_scope_boundary_typescript() {
1602 let code = r#"
1606const activeQueues = [
1607 { queue: queues.fooQueue, processor: foo.process },
1608];
1609
1610activeQueues.forEach((handler: any) => {
1611 const queue = handler.queue;
1612 let retries = 0;
1613
1614 class QueueHandler {
1615 handle() { return queue; }
1616 }
1617
1618 function createHandler() {
1619 return new QueueHandler();
1620 }
1621
1622 queue.process((job) => {
1623 const orderId = job.data.orderId;
1624 return orderId;
1625 });
1626});
1627
1628function handleFailure(job: any, err: any) {
1629 console.error('failed', err);
1630}
1631"#;
1632 let plugin = CodeParserPlugin;
1633 let entities = plugin.extract_entities(code, "process.ts");
1634 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1635 let top_level: Vec<&str> = entities
1636 .iter()
1637 .filter(|e| e.parent_id.is_none())
1638 .map(|e| e.name.as_str())
1639 .collect();
1640
1641 assert!(top_level.contains(&"activeQueues"), "got: {:?}", top_level);
1643 assert!(top_level.contains(&"handleFailure"), "got: {:?}", top_level);
1644
1645 assert!(names.contains(&"QueueHandler"), "got: {:?}", names);
1647 assert!(names.contains(&"handle"), "got: {:?}", names);
1648 assert!(names.contains(&"createHandler"), "got: {:?}", names);
1649
1650 assert!(!names.contains(&"queue"), "got: {:?}", names);
1652 assert!(!names.contains(&"retries"), "got: {:?}", names);
1653 assert!(!names.contains(&"orderId"), "got: {:?}", names);
1654 }
1655
1656 #[test]
1657 fn test_top_level_iife_wrapper_still_extracts_typescript_entities() {
1658 let code = r#"
1659function factory() {
1660 class Foo {
1661 method(): number {
1662 return 1;
1663 }
1664 }
1665
1666 function bar(): Foo {
1667 return new Foo();
1668 }
1669}
1670
1671factory();
1672"#;
1673 let plugin = CodeParserPlugin;
1674 let entities = plugin.extract_entities(code, "wrapped.ts");
1675 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1676 assert!(
1677 names.contains(&"factory"),
1678 "Should find top-level wrapper function, got: {:?}",
1679 names
1680 );
1681 assert!(
1682 names.contains(&"Foo"),
1683 "Should find class inside top-level wrapper, got: {:?}",
1684 names
1685 );
1686 assert!(
1687 names.contains(&"bar"),
1688 "Should find function inside top-level wrapper, got: {:?}",
1689 names
1690 );
1691 }
1692
1693 #[test]
1694 fn test_top_level_iife_still_extracts_typescript_entities() {
1695 let code = r#"
1696(() => {
1697 class Foo {
1698 method(): number {
1699 return 1;
1700 }
1701 }
1702
1703 function bar(): Foo {
1704 return new Foo();
1705 }
1706})();
1707"#;
1708 let plugin = CodeParserPlugin;
1709 let entities = plugin.extract_entities(code, "iife.ts");
1710 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1711 assert!(
1712 names.contains(&"Foo"),
1713 "Should find class inside top-level IIFE, got: {:?}",
1714 names
1715 );
1716 assert!(
1717 names.contains(&"bar"),
1718 "Should find function inside top-level IIFE, got: {:?}",
1719 names
1720 );
1721 }
1722
1723 #[test]
1724 fn test_function_locals_not_extracted_as_nested_entities_typescript() {
1725 let code = r#"
1726export default function foo() {
1727 const x = 1;
1728 return x;
1729}
1730"#;
1731 let plugin = CodeParserPlugin;
1732 let entities = plugin.extract_entities(code, "default-export.ts");
1733 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1734 assert!(
1735 names.contains(&"foo"),
1736 "Should find exported function, got: {:?}",
1737 names
1738 );
1739 assert!(
1740 !names.contains(&"x"),
1741 "Local inside function should not be extracted as an entity, got: {:?}",
1742 names
1743 );
1744 }
1745
1746 #[test]
1747 fn test_function_expression_scope_boundary_typescript() {
1748 let code = r#"
1751const foo = function namedExpr(x: number) {
1752 const inner = x + 1;
1753 return inner;
1754};
1755
1756const bar = function(y: number) {
1757 const local = y * 2;
1758 return local;
1759};
1760
1761const items = [1, 2, 3];
1762
1763items.forEach(function process(item) {
1764 const doubled = item * 2;
1765 console.log(doubled);
1766});
1767"#;
1768 let plugin = CodeParserPlugin;
1769 let entities = plugin.extract_entities(code, "funexpr.ts");
1770 let top_level: Vec<&str> = entities
1771 .iter()
1772 .filter(|e| e.parent_id.is_none())
1773 .map(|e| e.name.as_str())
1774 .collect();
1775 let find = |name: &str| entities.iter().find(|e| e.name == name).unwrap();
1776 let all_names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1777
1778 assert!(top_level.contains(&"foo"), "got: {:?}", top_level);
1781 assert!(top_level.contains(&"bar"), "got: {:?}", top_level);
1782 assert!(top_level.contains(&"items"), "got: {:?}", top_level);
1783 assert_eq!(find("foo").entity_type, "function");
1784 assert_eq!(find("bar").entity_type, "function");
1785 assert_eq!(find("items").entity_type, "variable");
1786
1787 assert!(!all_names.contains(&"inner"), "got: {:?}", all_names);
1789 assert!(!all_names.contains(&"local"), "got: {:?}", all_names);
1790 assert!(!all_names.contains(&"doubled"), "got: {:?}", all_names);
1791
1792 assert!(!top_level.contains(&"process"), "got: {:?}", top_level);
1794 }
1795
1796 #[test]
1797 fn test_variable_assigned_arrow_extracts_inner_entities() {
1798 let code = r#"
1801const handler = () => {
1802 class Inner {
1803 run() { return 1; }
1804 }
1805
1806 function make() {
1807 return new Inner();
1808 }
1809
1810 const local = 42;
1811};
1812"#;
1813 let plugin = CodeParserPlugin;
1814 let entities = plugin.extract_entities(code, "assigned.ts");
1815 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1816 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1817
1818 assert_eq!(handler.entity_type, "function");
1819 assert!(names.contains(&"handler"), "got: {:?}", names);
1820 assert!(names.contains(&"Inner"), "got: {:?}", names);
1821 assert!(names.contains(&"run"), "got: {:?}", names);
1822 assert!(names.contains(&"make"), "got: {:?}", names);
1823 assert!(!names.contains(&"local"), "got: {:?}", names);
1824 }
1825
1826 #[test]
1827 fn test_variable_assigned_function_expression_extracts_inner_entities() {
1828 let code = r#"
1830const handler = function() {
1831 class Inner {}
1832 function make() { return new Inner(); }
1833 const local = 42;
1834};
1835"#;
1836 let plugin = CodeParserPlugin;
1837 let entities = plugin.extract_entities(code, "funexpr-inner.ts");
1838 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1839 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1840
1841 assert_eq!(handler.entity_type, "function");
1842 assert!(names.contains(&"handler"), "got: {:?}", names);
1843 assert!(names.contains(&"Inner"), "got: {:?}", names);
1844 assert!(names.contains(&"make"), "got: {:?}", names);
1845 assert!(!names.contains(&"local"), "got: {:?}", names);
1846 }
1847
1848 #[test]
1849 fn test_let_assigned_arrow_stays_variable_typescript() {
1850 let code = r#"
1851let handler = () => {
1852 return 42;
1853};
1854"#;
1855 let plugin = CodeParserPlugin;
1856 let entities = plugin.extract_entities(code, "let-assigned.ts");
1857 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1858
1859 assert_eq!(handler.entity_type, "variable");
1860 }
1861
1862 #[test]
1863 fn test_const_assigned_arrow_promoted_to_function_javascript() {
1864 let code = r#"
1865const handler = () => {
1866 return 42;
1867};
1868"#;
1869 let plugin = CodeParserPlugin;
1870 let entities = plugin.extract_entities(code, "handler.js");
1871 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1872
1873 assert_eq!(handler.entity_type, "function");
1874 }
1875
1876 #[test]
1877 fn test_js_ts_multi_declarator_promotes_each_const_initializer() {
1878 let code = r#"
1879const value = 1, handler = () => value;
1880const first = () => 1, second = 2;
1881"#;
1882 let plugin = CodeParserPlugin;
1883 let entities = plugin.extract_entities(code, "sample.ts");
1884 let find = |name: &str| {
1885 entities.iter().find(|e| e.name == name).unwrap_or_else(|| {
1886 panic!(
1887 "missing {name}; got: {:?}",
1888 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>()
1889 )
1890 })
1891 };
1892
1893 assert_eq!(find("value").entity_type, "variable");
1894 assert_eq!(find("handler").entity_type, "function");
1895 assert_eq!(find("first").entity_type, "function");
1896 assert_eq!(find("second").entity_type, "variable");
1897 }
1898
1899 #[test]
1900 fn test_suppressed_multi_declarator_traverses_skipped_initializers() {
1901 let code = r#"
1902function wrapper() {
1903 const holder = class {
1904 run() { return 1; }
1905 }, handler = () => {
1906 class Inner {
1907 go() { return 2; }
1908 }
1909 }, value = 1;
1910}
1911"#;
1912 let plugin = CodeParserPlugin;
1913 let entities = plugin.extract_entities(code, "sample.ts");
1914 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1915 let find = |name: &str| {
1916 entities.iter().find(|e| e.name == name).unwrap_or_else(|| {
1917 panic!(
1918 "missing {name}; got: {:?}",
1919 entities
1920 .iter()
1921 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1922 .collect::<Vec<_>>()
1923 )
1924 })
1925 };
1926
1927 assert_eq!(find("wrapper").entity_type, "function");
1928 assert_eq!(find("handler").entity_type, "function");
1929 assert!(names.contains(&"run"), "got: {:?}", names);
1930 assert!(names.contains(&"Inner"), "got: {:?}", names);
1931 assert!(names.contains(&"go"), "got: {:?}", names);
1932 assert!(!names.contains(&"holder"), "got: {:?}", names);
1933 assert!(!names.contains(&"value"), "got: {:?}", names);
1934 }
1935
1936 #[test]
1937 fn test_go_var_declaration() {
1938 let code = r#"package featuremgmt
1939
1940type FeatureFlag struct {
1941 Name string
1942 Description string
1943 Stage string
1944}
1945
1946var standardFeatureFlags = []FeatureFlag{
1947 {
1948 Name: "panelTitleSearch",
1949 Description: "Search for dashboards using panel title",
1950 Stage: "PublicPreview",
1951 },
1952}
1953
1954func GetFlags() []FeatureFlag {
1955 return standardFeatureFlags
1956}
1957"#;
1958 let plugin = CodeParserPlugin;
1959 let entities = plugin.extract_entities(code, "flags.go");
1960 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1961 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1962 eprintln!("Go entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
1963
1964 assert!(names.contains(&"FeatureFlag"), "Should find type FeatureFlag, got: {:?}", names);
1965 assert!(names.contains(&"standardFeatureFlags"), "Should find var standardFeatureFlags, got: {:?}", names);
1966 assert!(names.contains(&"GetFlags"), "Should find func GetFlags, got: {:?}", names);
1967 }
1968
1969 #[test]
1970 fn test_go_grouped_var_declaration() {
1971 let code = r#"package test
1972
1973var (
1974 simple = 42
1975 flags = []string{"a", "b"}
1976)
1977
1978const (
1979 x = 1
1980 y = 2
1981)
1982
1983func main() {}
1984"#;
1985 let plugin = CodeParserPlugin;
1986 let entities = plugin.extract_entities(code, "test.go");
1987 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1988 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1989 eprintln!("Go grouped entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
1990
1991 assert!(names.contains(&"flags") || names.contains(&"simple"), "Should find grouped var, got: {:?}", names);
1992 assert!(names.contains(&"x"), "Should find grouped const x, got: {:?}", names);
1993 assert!(names.contains(&"main"), "Should find func main, got: {:?}", names);
1994 }
1995
1996 #[test]
1997 fn test_dart_entity_extraction() {
1998 let code = r#"
1999import 'dart:math';
2000
2001class Calculator {
2002 final String name;
2003
2004 Calculator(this.name);
2005
2006 Calculator.withDefault() : name = 'default';
2007
2008 factory Calculator.create(String name) {
2009 return Calculator(name);
2010 }
2011
2012 int add(int a, int b) {
2013 return a + b;
2014 }
2015
2016 int get doubleAdd => add(1, 1) * 2;
2017
2018 set label(String value) {
2019 // no-op
2020 }
2021
2022 int operator +(Calculator other) {
2023 return 0;
2024 }
2025}
2026
2027mixin Loggable {
2028 void log(String message) {
2029 print(message);
2030 }
2031}
2032
2033extension StringExt on String {
2034 bool get isBlank => trim().isEmpty;
2035}
2036
2037enum Status {
2038 active,
2039 inactive;
2040
2041 String display() => name.toUpperCase();
2042}
2043
2044typedef Callback = void Function(int);
2045
2046int add(int a, int b) {
2047 return a + b;
2048}
2049
2050extension type Wrapper(int value) implements int {}
2051"#;
2052 let plugin = CodeParserPlugin;
2053 let entities = plugin.extract_entities(code, "calculator.dart");
2054 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2055 eprintln!(
2056 "Dart entities: {:?}",
2057 entities
2058 .iter()
2059 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
2060 .collect::<Vec<_>>()
2061 );
2062
2063 assert!(names.contains(&"Calculator"), "Should find class, got: {:?}", names);
2065 assert!(names.contains(&"Loggable"), "Should find mixin, got: {:?}", names);
2066 assert!(names.contains(&"StringExt"), "Should find extension, got: {:?}", names);
2067 assert!(names.contains(&"Status"), "Should find enum, got: {:?}", names);
2068 assert!(names.contains(&"Callback"), "Should find typedef, got: {:?}", names);
2069 assert!(names.contains(&"add"), "Should find top-level function, got: {:?}", names);
2070 assert!(names.contains(&"Wrapper"), "Should find extension type, got: {:?}", names);
2071
2072 let add_method = entities.iter().find(|e| e.name == "add" && e.parent_id.is_some());
2074 assert!(add_method.is_some(), "Should find add method inside Calculator");
2075 assert_eq!(add_method.unwrap().entity_type, "method");
2076
2077 let unnamed_ctor = entities.iter().find(|e| e.name == "Calculator" && e.entity_type == "constructor");
2079 assert!(unnamed_ctor.is_some(), "Should find unnamed constructor");
2080 let named_ctor = entities.iter().find(|e| e.name == "Calculator.withDefault");
2081 assert!(named_ctor.is_some(), "Should find named constructor Calculator.withDefault, got: {:?}", names);
2082 assert_eq!(named_ctor.unwrap().entity_type, "constructor");
2083 assert_ne!(unnamed_ctor.unwrap().id, named_ctor.unwrap().id, "Named and unnamed constructors must have different entity IDs");
2084
2085 let factory_ctor = entities.iter().find(|e| e.name == "Calculator.create");
2087 assert!(factory_ctor.is_some(), "Should find factory constructor Calculator.create, got: {:?}", names);
2088 assert_eq!(factory_ctor.unwrap().entity_type, "constructor");
2089
2090 let getter = entities.iter().find(|e| e.name == "doubleAdd");
2092 assert!(getter.is_some(), "Should find getter doubleAdd");
2093 assert_eq!(getter.unwrap().entity_type, "getter");
2094
2095 let setter = entities.iter().find(|e| e.name == "label");
2096 assert!(setter.is_some(), "Should find setter label");
2097 assert_eq!(setter.unwrap().entity_type, "setter");
2098
2099 let operator = entities.iter().find(|e| e.name == "operator +");
2100 assert!(operator.is_some(), "Should find operator +");
2101 assert_eq!(operator.unwrap().entity_type, "method");
2102
2103 let log_method = entities.iter().find(|e| e.name == "log");
2105 assert!(log_method.is_some(), "Should find log in Loggable");
2106 assert!(log_method.unwrap().parent_id.is_some(), "log should have parent_id");
2107
2108 let callback = entities.iter().find(|e| e.name == "Callback").unwrap();
2110 assert_eq!(callback.entity_type, "type", "typedef should map to 'type'");
2111
2112 let loggable = entities.iter().find(|e| e.name == "Loggable").unwrap();
2113 assert_eq!(loggable.entity_type, "mixin");
2114
2115 let ext = entities.iter().find(|e| e.name == "StringExt").unwrap();
2116 assert_eq!(ext.entity_type, "extension");
2117
2118 let wrapper = entities.iter().find(|e| e.name == "Wrapper").unwrap();
2119 assert_eq!(wrapper.entity_type, "extension");
2120 }
2121
2122 #[test]
2123 #[cfg(feature = "lang-sql")]
2124 fn test_sql_entity_extraction() {
2125 let code = r#"
2126CREATE TABLE users (id INT PRIMARY KEY, name TEXT);
2127CREATE VIEW active_users AS SELECT * FROM users WHERE active;
2128CREATE FUNCTION add(a INT, b INT) RETURNS INT AS $$ BEGIN RETURN a + b; END; $$ LANGUAGE plpgsql;
2129CREATE INDEX idx_name ON users(name);
2130CREATE TYPE mood AS ENUM ('sad', 'happy');
2131CREATE SCHEMA myapp;
2132CREATE MATERIALIZED VIEW mv AS SELECT 1;
2133CREATE TABLE billing.invoices (id INT);
2134"#;
2135 let plugin = CodeParserPlugin;
2136 let entities = plugin.extract_entities(code, "schema.sql");
2137 let by_name = |n: &str| entities.iter().find(|e| e.name == n);
2138
2139 assert_eq!(by_name("users").map(|e| e.entity_type.as_str()), Some("table"));
2141 assert_eq!(by_name("active_users").map(|e| e.entity_type.as_str()), Some("view"));
2142 assert_eq!(by_name("add").map(|e| e.entity_type.as_str()), Some("function"));
2143 assert_eq!(by_name("mood").map(|e| e.entity_type.as_str()), Some("type"));
2144 assert_eq!(by_name("mv").map(|e| e.entity_type.as_str()), Some("view"));
2145 assert_eq!(
2146 by_name("billing.invoices").map(|e| e.entity_type.as_str()),
2147 Some("table"),
2148 "schema-qualified table name should be preserved"
2149 );
2150
2151 assert_eq!(
2153 by_name("idx_name").map(|e| e.entity_type.as_str()),
2154 Some("index"),
2155 "index should be named idx_name, not the table it indexes"
2156 );
2157 assert_eq!(by_name("myapp").map(|e| e.entity_type.as_str()), Some("schema"));
2158 }
2159
2160 #[test]
2161 fn test_dart_top_level_function_includes_body() {
2162 let code = r#"
2163int add(int a, int b) {
2164 return a + b;
2165}
2166
2167String greet(String name) => 'Hello, $name!';
2168"#;
2169 let plugin = CodeParserPlugin;
2170 let entities = plugin.extract_entities(code, "funcs.dart");
2171 eprintln!(
2172 "Dart top-level: {:?}",
2173 entities
2174 .iter()
2175 .map(|e| (&e.name, &e.entity_type, &e.content))
2176 .collect::<Vec<_>>()
2177 );
2178
2179 let add_fn = entities.iter().find(|e| e.name == "add").unwrap();
2180 assert!(
2181 add_fn.content.contains("return a + b"),
2182 "Top-level function content should include the body, got: {:?}",
2183 add_fn.content
2184 );
2185
2186 let greet_fn = entities.iter().find(|e| e.name == "greet").unwrap();
2187 assert!(
2188 greet_fn.content.contains("Hello"),
2189 "Expression body should be included, got: {:?}",
2190 greet_fn.content
2191 );
2192
2193 let code_v2 = r#"
2195int add(int a, int b) {
2196 return a * b;
2197}
2198
2199String greet(String name) => 'Hello, $name!';
2200"#;
2201 let entities_v2 = plugin.extract_entities(code_v2, "funcs.dart");
2202 let add_v2 = entities_v2.iter().find(|e| e.name == "add").unwrap();
2203 assert_ne!(
2204 add_fn.content_hash, add_v2.content_hash,
2205 "Body change should produce different content_hash"
2206 );
2207
2208 let greet_v2 = entities_v2.iter().find(|e| e.name == "greet").unwrap();
2210 assert_eq!(
2211 greet_fn.content_hash, greet_v2.content_hash,
2212 "Unchanged function should keep the same content_hash"
2213 );
2214 }
2215
2216 #[test]
2217 fn test_dart_renamed_named_constructor_same_structural_hash() {
2218 let code_a = r#"
2219class Foo {
2220 Foo.fromJson(Map<String, dynamic> json) {
2221 print(json);
2222 }
2223}
2224"#;
2225 let code_b = r#"
2226class Foo {
2227 Foo.fromMap(Map<String, dynamic> json) {
2228 print(json);
2229 }
2230}
2231"#;
2232 let plugin = CodeParserPlugin;
2233 let entities_a = plugin.extract_entities(code_a, "a.dart");
2234 let entities_b = plugin.extract_entities(code_b, "b.dart");
2235
2236 let ctor_a = entities_a.iter().find(|e| e.name == "Foo.fromJson").unwrap();
2237 let ctor_b = entities_b.iter().find(|e| e.name == "Foo.fromMap").unwrap();
2238
2239 assert_eq!(
2240 ctor_a.structural_hash, ctor_b.structural_hash,
2241 "Renamed named constructor with identical body should have same structural_hash"
2242 );
2243 assert_ne!(
2244 ctor_a.content_hash, ctor_b.content_hash,
2245 "Content hash should differ since raw content includes the name"
2246 );
2247 }
2248
2249 #[test]
2250 fn test_dart_top_level_getter_setter() {
2251 let code = r#"
2252int _value = 0;
2253
2254int get currentValue {
2255 return _value;
2256}
2257
2258set currentValue(int v) {
2259 _value = v;
2260}
2261"#;
2262 let plugin = CodeParserPlugin;
2263 let entities = plugin.extract_entities(code, "accessors.dart");
2264 eprintln!(
2265 "Dart top-level accessors: {:?}",
2266 entities
2267 .iter()
2268 .map(|e| (&e.name, &e.entity_type, &e.content))
2269 .collect::<Vec<_>>()
2270 );
2271
2272 let getter = entities.iter().find(|e| e.name == "currentValue" && e.entity_type == "getter");
2273 assert!(getter.is_some(), "Should find top-level getter, got: {:?}",
2274 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2275 assert!(
2276 getter.unwrap().content.contains("return _value"),
2277 "Top-level getter content should include the body"
2278 );
2279 assert!(getter.unwrap().parent_id.is_none(), "Top-level getter should have no parent");
2280
2281 let setter = entities.iter().find(|e| e.name == "currentValue" && e.entity_type == "function");
2285 assert!(setter.is_some(), "Should find top-level setter as function, got: {:?}",
2286 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2287 assert!(
2288 setter.unwrap().content.contains("_value = v"),
2289 "Top-level setter content should include the body"
2290 );
2291 }
2292
2293 #[test]
2294 fn test_dart_field_entity_type() {
2295 let code = r#"
2296class Config {
2297 final String name;
2298 static const int maxRetries = 3;
2299}
2300"#;
2301 let plugin = CodeParserPlugin;
2302 let entities = plugin.extract_entities(code, "config.dart");
2303 eprintln!(
2304 "Dart fields: {:?}",
2305 entities
2306 .iter()
2307 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
2308 .collect::<Vec<_>>()
2309 );
2310
2311 let name_field = entities.iter().find(|e| e.name == "name" && e.parent_id.is_some());
2312 assert!(name_field.is_some(), "Should find field 'name', got: {:?}",
2313 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2314 assert_eq!(name_field.unwrap().entity_type, "field");
2315
2316 let max_retries = entities.iter().find(|e| e.name == "maxRetries");
2317 assert!(max_retries.is_some(), "Should find field 'maxRetries', got: {:?}",
2318 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2319 assert_eq!(max_retries.unwrap().entity_type, "field");
2320 }
2321
2322 #[test]
2323 fn test_dart_identifier_list_fields() {
2324 let code = r#"
2328abstract class Shape {
2329 abstract double x, y;
2330 abstract String label;
2331}
2332"#;
2333 let plugin = CodeParserPlugin;
2334 let entities = plugin.extract_entities(code, "shape.dart");
2335 eprintln!(
2336 "Dart identifier_list fields: {:?}",
2337 entities
2338 .iter()
2339 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
2340 .collect::<Vec<_>>()
2341 );
2342
2343 let x_field = entities.iter().find(|e| e.name == "x");
2344 assert!(x_field.is_some(), "Should find field 'x' from identifier_list, got: {:?}",
2345 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2346 assert_eq!(x_field.unwrap().entity_type, "field");
2347 assert!(x_field.unwrap().parent_id.is_some(), "field 'x' should be nested under Shape");
2348
2349 let label_field = entities.iter().find(|e| e.name == "label");
2350 assert!(label_field.is_some(), "Should find field 'label' from single-element identifier_list, got: {:?}",
2351 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2352 assert_eq!(label_field.unwrap().entity_type, "field");
2353 }
2354
2355 #[test]
2356 fn test_ocaml_entity_extraction() {
2357 let code = r#"
2358type color = Red | Green | Blue
2359
2360type point = {
2361 x : float;
2362 y : float;
2363}
2364
2365exception Not_found of string
2366
2367let greet name =
2368 Printf.printf "Hello, %s!\n" name
2369
2370let add a b = a + b
2371
2372let version = "1.0"
2373
2374let color_to_string = function
2375 | Red -> "red"
2376 | Blue -> "blue"
2377
2378let inc = fun x -> x + 1
2379
2380module MyModule = struct
2381 let helper x = x * 2
2382end
2383
2384module type Printable = sig
2385 val to_string : 'a -> string
2386end
2387
2388external caml_input : in_channel -> bytes -> int -> int -> int = "caml_input"
2389
2390class point_class x_init = object
2391 val mutable x = x_init
2392 method get_x = x
2393end
2394
2395class type measurable = object
2396 method measure : float
2397end
2398"#;
2399 let plugin = CodeParserPlugin;
2400 let entities = plugin.extract_entities(code, "example.ml");
2401 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2402 eprintln!("OCaml entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2403
2404 let find = |name: &str| entities.iter().find(|e| e.name == name)
2405 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2406
2407 assert_eq!(find("color").entity_type, "type");
2408 assert_eq!(find("point").entity_type, "type");
2409 assert_eq!(find("Not_found").entity_type, "exception");
2410 assert_eq!(find("greet").entity_type, "function");
2411 assert_eq!(find("add").entity_type, "function");
2412 assert_eq!(find("version").entity_type, "value");
2413 assert_eq!(find("color_to_string").entity_type, "function");
2414 assert_eq!(find("inc").entity_type, "function");
2415 assert_eq!(find("MyModule").entity_type, "module");
2416 assert_eq!(find("Printable").entity_type, "module_type");
2417 assert_eq!(find("caml_input").entity_type, "external");
2418 assert_eq!(find("point_class").entity_type, "class");
2419 assert_eq!(find("measurable").entity_type, "class_type");
2420 }
2421
2422 #[test]
2423 fn test_ocaml_nested_module_entities() {
2424 let code = r#"
2425module Outer = struct
2426 let x = 42
2427
2428 module Inner = struct
2429 let y = 0
2430 end
2431end
2432"#;
2433 let plugin = CodeParserPlugin;
2434 let entities = plugin.extract_entities(code, "nested.ml");
2435 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2436 eprintln!("OCaml nested: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
2437
2438 let find = |name: &str| entities.iter().find(|e| e.name == name)
2439 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2440
2441 let outer = find("Outer");
2442 let x = find("x");
2443 let inner = find("Inner");
2444 let y = find("y");
2445
2446 assert_eq!(outer.entity_type, "module");
2447 assert_eq!(x.entity_type, "value");
2448 assert_eq!(inner.entity_type, "module");
2449 assert_eq!(y.entity_type, "value");
2450
2451 assert!(x.parent_id.as_ref().is_some_and(|p| p == &outer.id), "x should be nested under Outer");
2452 assert!(inner.parent_id.as_ref().is_some_and(|p| p == &outer.id), "Inner should be nested under Outer");
2453 assert!(y.parent_id.as_ref().is_some_and(|p| p == &inner.id), "y should be nested under Inner");
2454 }
2455
2456 #[test]
2457 fn test_ocaml_interface_entity_extraction() {
2458 let code = r#"
2459type t
2460
2461val create : string -> t
2462val to_string : t -> string
2463
2464exception Invalid_input of string
2465
2466module type Serializable = sig
2467 val serialize : t -> string
2468end
2469"#;
2470 let plugin = CodeParserPlugin;
2471 let entities = plugin.extract_entities(code, "example.mli");
2472 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2473 eprintln!("OCaml interface entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2474
2475 let find = |name: &str| entities.iter().find(|e| e.name == name)
2476 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2477
2478 assert_eq!(find("t").entity_type, "type");
2479 assert_eq!(find("create").entity_type, "val");
2480 assert_eq!(find("to_string").entity_type, "val");
2481 assert_eq!(find("Invalid_input").entity_type, "exception");
2482 assert_eq!(find("Serializable").entity_type, "module_type");
2483 }
2484
2485 #[test]
2486 fn test_ocaml_mutual_recursion_let() {
2487 let code = r#"
2488let rec even n = (n = 0) || odd (n - 1)
2489and odd n = (n <> 0) && even (n - 1)
2490
2491let rec ping x = pong (x - 1)
2492and pong x = if x <= 0 then 0 else ping (x - 1)
2493"#;
2494 let plugin = CodeParserPlugin;
2495 let entities = plugin.extract_entities(code, "mutual.ml");
2496 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2497 eprintln!("OCaml mutual let: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2498
2499 let find = |name: &str| entities.iter().find(|e| e.name == name)
2500 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2501
2502 assert_eq!(find("even").entity_type, "function");
2503 assert_eq!(find("odd").entity_type, "function");
2504 assert_eq!(find("ping").entity_type, "function");
2505 assert_eq!(find("pong").entity_type, "function");
2506 }
2507
2508 #[test]
2509 fn test_ocaml_mutual_recursion_module() {
2510 let code = r#"
2511module rec A : sig val x : int end = struct
2512 let x = B.y + 1
2513end
2514and B : sig val y : int end = struct
2515 let y = 0
2516end
2517"#;
2518 let plugin = CodeParserPlugin;
2519 let entities = plugin.extract_entities(code, "mutual_mod.ml");
2520 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2521 eprintln!("OCaml mutual module: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
2522
2523 let find = |name: &str| entities.iter().find(|e| e.name == name)
2524 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2525
2526 let a = find("A");
2527 let b = find("B");
2528 assert_eq!(a.entity_type, "module");
2529 assert_eq!(b.entity_type, "module");
2530
2531 let x = find("x");
2532 let y = find("y");
2533 assert!(x.parent_id.as_ref().is_some_and(|p| p == &a.id), "x should be nested under A");
2534 assert!(y.parent_id.as_ref().is_some_and(|p| p == &b.id), "y should be nested under B");
2535 }
2536
2537 #[test]
2538 fn test_ocaml_destructured_let() {
2539 let code = r#"
2540let (a, b) = (1, 2)
2541
2542let { x; y } = point
2543
2544let simple = 42
2545"#;
2546 let plugin = CodeParserPlugin;
2547 let entities = plugin.extract_entities(code, "destruct.ml");
2548 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2549 eprintln!("OCaml destructured: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2550
2551 let find = |name: &str| entities.iter().find(|e| e.name == name)
2552 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2553
2554 assert_eq!(find("a").entity_type, "value");
2555 assert_eq!(find("b").entity_type, "value");
2556 assert_eq!(find("x").entity_type, "value");
2557 assert_eq!(find("y").entity_type, "value");
2558 assert_eq!(find("simple").entity_type, "value");
2559 }
2560
2561 #[test]
2562 fn test_ocaml_mutual_recursion_class() {
2563 let code = r#"
2564class foo = object
2565 method x = 1
2566end
2567and bar = object
2568 method y = 2
2569end
2570"#;
2571 let plugin = CodeParserPlugin;
2572 let entities = plugin.extract_entities(code, "classes.ml");
2573 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2574 eprintln!("OCaml mutual class: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2575
2576 let find = |name: &str| entities.iter().find(|e| e.name == name)
2577 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2578
2579 assert_eq!(find("foo").entity_type, "class");
2580 assert_eq!(find("bar").entity_type, "class");
2581 }
2582
2583 #[test]
2584 fn test_perl_entity_extraction() {
2585 let code = r#"package Foo::Bar;
2586
2587use strict;
2588use warnings;
2589
2590sub hello {
2591 my ($self, $name) = @_;
2592 print "Hello, $name!\n";
2593}
2594
2595sub _private_helper {
2596 return 42;
2597}
2598
25991;
2600"#;
2601 let plugin = CodeParserPlugin;
2602 let entities = plugin.extract_entities(code, "Foo/Bar.pm");
2603 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2604
2605 assert!(names.contains(&"Foo::Bar"), "got: {:?}", names);
2606 assert!(names.contains(&"hello"), "got: {:?}", names);
2607 assert!(names.contains(&"_private_helper"), "got: {:?}", names);
2608
2609 let find = |name: &str| entities.iter().find(|e| e.name == name)
2610 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2611
2612 assert_eq!(find("Foo::Bar").entity_type, "package");
2613 assert_eq!(find("hello").entity_type, "function");
2614 assert_eq!(find("_private_helper").entity_type, "function");
2615 }
2616
2617 #[test]
2618 fn test_fortran_entity_extraction() {
2619 let code = r#"module math_utils
2620 implicit none
2621contains
2622 function add(a, b) result(c)
2623 integer, intent(in) :: a, b
2624 integer :: c
2625 c = a + b
2626 end function add
2627
2628 subroutine greet()
2629 print *, "hello"
2630 end subroutine greet
2631end module math_utils
2632
2633program main
2634 implicit none
2635 print *, "hello"
2636end program main
2637"#;
2638 let plugin = CodeParserPlugin;
2639 let entities = plugin.extract_entities(code, "test.f90");
2640 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2641
2642 assert!(names.contains(&"math_utils"), "got: {:?}", names);
2643 assert!(names.contains(&"add"), "got: {:?}", names);
2644 assert!(names.contains(&"greet"), "got: {:?}", names);
2645 assert!(names.contains(&"main"), "got: {:?}", names);
2646
2647 let find = |name: &str| entities.iter().find(|e| e.name == name)
2648 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2649
2650 assert_eq!(find("math_utils").entity_type, "module");
2651 assert_eq!(find("add").entity_type, "function");
2652 assert_eq!(find("greet").entity_type, "subroutine");
2653 assert_eq!(find("main").entity_type, "program");
2654
2655 assert!(find("add").parent_id.is_some());
2657 assert!(find("greet").parent_id.is_some());
2658 }
2659
2660 #[test]
2661 fn test_scala_entity_extraction() {
2662 let code = r#"
2663package com.example
2664
2665import scala.collection.mutable
2666
2667class UserService(val name: String) {
2668 def getUsers(): List[User] = db.findAll()
2669
2670 def createUser(user: User): Unit = db.save(user)
2671
2672 private def validate(user: User): Boolean = true
2673}
2674
2675object UserService {
2676 def apply(name: String): UserService = new UserService(name)
2677
2678 val DefaultName: String = "default"
2679}
2680
2681trait Repository[T] {
2682 def findById(id: String): Option[T]
2683 def findAll(): List[T]
2684}
2685
2686case class User(id: String, name: String)
2687
2688type UserId = String
2689"#;
2690 let plugin = CodeParserPlugin;
2691 let entities = plugin.extract_entities(code, "UserService.scala");
2692 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2693 eprintln!("Scala entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2694
2695 assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
2696 assert!(names.contains(&"Repository"), "Should find trait Repository, got: {:?}", names);
2697 assert!(names.contains(&"getUsers"), "Should find method getUsers, got: {:?}", names);
2698 assert!(names.contains(&"createUser"), "Should find method createUser, got: {:?}", names);
2699
2700 let get_users = entities.iter().find(|e| e.name == "getUsers").unwrap();
2702 assert!(get_users.parent_id.is_some(), "getUsers should have parent_id");
2703 }
2704
2705 #[test]
2706 fn test_scala3_entity_extraction() {
2707 let code = r#"
2708package com.example
2709
2710enum Color:
2711 case Red, Green, Blue
2712
2713enum Planet(mass: Double, radius: Double):
2714 case Mercury extends Planet(3.303e+23, 2.4397e6)
2715 case Venus extends Planet(4.869e+24, 6.0518e6)
2716
2717object Main:
2718 def main(args: Array[String]): Unit =
2719 println("Hello, World!")
2720
2721trait Greeter:
2722 def greet(name: String): String
2723
2724given Greeter with
2725 def greet(name: String): String = s"Hello, $name!"
2726
2727extension (s: String)
2728 def shout: String = s.toUpperCase + "!"
2729
2730type Predicate[A] = A => Boolean
2731"#;
2732 let plugin = CodeParserPlugin;
2733 let entities = plugin.extract_entities(code, "Main.scala");
2734 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2735 eprintln!("Scala 3 entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2736
2737 assert!(names.contains(&"Color"), "Should find enum Color, got: {:?}", names);
2738 assert!(names.contains(&"Planet"), "Should find enum Planet, got: {:?}", names);
2739 assert!(names.contains(&"Main"), "Should find object Main, got: {:?}", names);
2740 assert!(names.contains(&"Greeter"), "Should find trait Greeter, got: {:?}", names);
2741 assert!(names.contains(&"Predicate"), "Should find type alias Predicate, got: {:?}", names);
2742 }
2743
2744 #[test]
2745 fn test_zig_entity_extraction() {
2746 let code = r#"
2747const std = @import("std");
2748
2749pub const Point = struct {
2750 x: i32,
2751 y: i32,
2752};
2753
2754pub const Color = enum {
2755 red,
2756 green,
2757 blue,
2758};
2759
2760const Person = struct {
2761 name: []const u8,
2762 age: u32,
2763};
2764
2765pub fn greet(name: []const u8) void {
2766 std.debug.print("Hello, {s}!\n", .{name});
2767}
2768
2769fn add(a: i32, b: i32) i32 {
2770 return a + b;
2771}
2772
2773pub fn main() !void {
2774 greet("world");
2775}
2776
2777test "basic addition" {
2778 const result = add(2, 3);
2779 _ = result;
2780}
2781"#;
2782 let plugin = CodeParserPlugin;
2783 let entities = plugin.extract_entities(code, "main.zig");
2784 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2785 let types: std::collections::HashMap<&str, &str> = entities
2786 .iter()
2787 .map(|e| (e.name.as_str(), e.entity_type.as_str()))
2788 .collect();
2789
2790 assert!(names.contains(&"greet"), "Should find greet, got: {:?}", names);
2791 assert!(names.contains(&"add"), "Should find add, got: {:?}", names);
2792 assert!(names.contains(&"main"), "Should find main, got: {:?}", names);
2793 assert!(names.contains(&"Point"), "Should find Point, got: {:?}", names);
2794 assert!(names.contains(&"Color"), "Should find Color, got: {:?}", names);
2795 assert!(names.contains(&"Person"), "Should find Person, got: {:?}", names);
2796
2797 assert_eq!(types["greet"], "function");
2798 assert_eq!(types["add"], "function");
2799 assert_eq!(types["Point"], "struct");
2800 assert_eq!(types["Color"], "enum");
2801 assert_eq!(types["Person"], "struct");
2802 }
2803
2804 #[test]
2805 #[cfg(feature = "lang-edn")]
2806 fn test_edn_deps_edn_map_entries() {
2807 let code = r#"{:deps {org.clojure/clojure {:mvn/version "1.11.0"}}
2808 :paths ["src" "resources"]
2809 :aliases {:dev {:extra-deps {cider/cider-nrepl {:mvn/version "0.28.5"}}}}}"#;
2810 let plugin = CodeParserPlugin;
2811 let entities = plugin.extract_entities(code, "deps.edn");
2812 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2813 let types: std::collections::HashMap<&str, &str> = entities
2814 .iter()
2815 .map(|e| (e.name.as_str(), e.entity_type.as_str()))
2816 .collect();
2817
2818 assert!(names.contains(&":deps"), "Should find :deps, got: {:?}", names);
2819 assert!(names.contains(&":paths"), "Should find :paths, got: {:?}", names);
2820 assert!(names.contains(&":aliases"), "Should find :aliases, got: {:?}", names);
2821 assert_eq!(names.len(), 3, "Should have exactly 3 entries, got: {:?}", names);
2822 assert_eq!(types[":deps"], "entry");
2823 assert_eq!(types[":paths"], "entry");
2824 assert_eq!(types[":aliases"], "entry");
2825 }
2826
2827 #[test]
2828 #[cfg(feature = "lang-edn")]
2829 fn test_edn_nested_map_values_not_extracted() {
2830 let code = r#"{:a {:b 1 :c 2} :d 3}"#;
2832 let plugin = CodeParserPlugin;
2833 let entities = plugin.extract_entities(code, "config.edn");
2834 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2835
2836 assert!(names.contains(&":a"), "Should find :a, got: {:?}", names);
2837 assert!(names.contains(&":d"), "Should find :d, got: {:?}", names);
2838 assert!(!names.contains(&":b"), "Inner :b should not be extracted");
2839 assert!(!names.contains(&":c"), "Inner :c should not be extracted");
2840 assert_eq!(names.len(), 2);
2841 }
2842
2843 #[test]
2844 #[cfg(feature = "lang-edn")]
2845 fn test_edn_non_map_top_level_forms_not_extracted() {
2846 let code = r#"["alpha" "beta"]"#;
2848 let plugin = CodeParserPlugin;
2849 let entities = plugin.extract_entities(code, "data.edn");
2850 assert_eq!(entities.len(), 0);
2851 }
2852
2853 #[test]
2854 #[cfg(feature = "lang-edn")]
2855 fn test_edn_symbol_keys_extracted() {
2856 let code = r#"{foo 1 bar 2}"#;
2857 let plugin = CodeParserPlugin;
2858 let entities = plugin.extract_entities(code, "sym.edn");
2859 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2860
2861 assert!(names.contains(&"foo"), "Should find foo, got: {:?}", names);
2862 assert!(names.contains(&"bar"), "Should find bar, got: {:?}", names);
2863 }
2864}