1mod entity_extractor;
2pub mod languages;
3
4use std::cell::RefCell;
5use std::collections::HashMap;
6
7use crate::model::entity::SemanticEntity;
8use crate::parser::plugin::SemanticParserPlugin;
9use crate::utils::hash::{content_hash, structural_hash};
10use languages::{get_all_code_extensions, get_language_config};
11use entity_extractor::extract_entities;
12
13pub struct CodeParserPlugin;
14
15thread_local! {
18 static PARSER_CACHE: RefCell<HashMap<&'static str, tree_sitter::Parser>> = RefCell::new(HashMap::new());
19}
20
21fn language_config_for_content(
22 content: &str,
23 file_path: &str,
24) -> Option<&'static languages::LanguageConfig> {
25 let ext = std::path::Path::new(file_path)
26 .extension()
27 .and_then(|e| e.to_str())
28 .map(|e| format!(".{}", e.to_lowercase()))
29 .unwrap_or_default();
30
31 get_language_config(&ext).or_else(|| {
32 detect_ext_from_content(content).and_then(|shebang_ext| get_language_config(&shebang_ext))
33 })
34}
35
36fn parse_tree(
37 config: &'static languages::LanguageConfig,
38 content: &str,
39) -> Option<tree_sitter::Tree> {
40 let language = (config.get_language)()?;
41
42 PARSER_CACHE.with(|cache| {
43 let mut cache = cache.borrow_mut();
44 let parser = cache.entry(config.id).or_insert_with(|| {
45 let mut p = tree_sitter::Parser::new();
46 let _ = p.set_language(&language);
47 p
48 });
49
50 parser.parse(content.as_bytes(), None)
51 })
52}
53
54fn has_non_comment_content(node: tree_sitter::Node, source: &[u8]) -> bool {
55 let mut worklist = Vec::new();
56 let mut cursor = node.walk();
57 worklist.extend(node.children(&mut cursor));
58
59 while let Some(node) = worklist.pop() {
60 if is_comment_node(node.kind()) {
61 continue;
62 }
63
64 if node.child_count() == 0 {
65 let start = node.start_byte();
66 let end = node.end_byte();
67 if start < end
68 && end <= source.len()
69 && source[start..end].iter().any(|b| !b.is_ascii_whitespace())
70 {
71 return true;
72 }
73 continue;
74 }
75
76 let mut cursor = node.walk();
77 worklist.extend(node.children(&mut cursor));
78 }
79
80 false
81}
82
83fn is_comment_node(kind: &str) -> bool {
84 matches!(
85 kind,
86 "comment" | "line_comment" | "block_comment" | "doc_comment" | "tag_comment"
87 )
88}
89
90fn shebang_line(content: &str) -> Option<&str> {
91 content
92 .strip_prefix("#!")
93 .map(|rest| rest.lines().next().unwrap_or(""))
94}
95
96impl SemanticParserPlugin for CodeParserPlugin {
97 fn id(&self) -> &str {
98 "code"
99 }
100
101 fn extensions(&self) -> &[&str] {
102 get_all_code_extensions()
103 }
104
105 fn extract_entities(&self, content: &str, file_path: &str) -> Vec<SemanticEntity> {
106 self.extract_entities_with_tree(content, file_path).0
107 }
108
109 fn extract_entities_with_tree(
110 &self,
111 content: &str,
112 file_path: &str,
113 ) -> (Vec<SemanticEntity>, Option<tree_sitter::Tree>) {
114 let Some(config) = language_config_for_content(content, file_path) else {
115 return (Vec::new(), None);
116 };
117
118 let Some(tree) = parse_tree(config, content) else {
119 return (Vec::new(), None);
120 };
121
122 let entities = extract_entities(&tree, file_path, config, content);
123 (entities, Some(tree))
124 }
125
126 fn structural_hash_content(&self, content: &str, file_path: &str) -> Option<String> {
127 let config = language_config_for_content(content, file_path)?;
128 let tree = parse_tree(config, content)?;
129 let shebang = shebang_line(content);
130 if shebang.is_none() && !has_non_comment_content(tree.root_node(), content.as_bytes()) {
131 return Some(String::new());
132 }
133 let structural = structural_hash(tree.root_node(), content.as_bytes());
134 match shebang {
135 Some(shebang) => Some(content_hash(&format!("shebang:{shebang}\n{structural}"))),
136 None => Some(structural),
137 }
138 }
139}
140
141use crate::parser::registry::detect_ext_from_content;
142
143#[cfg(test)]
144mod tests {
145 use super::*;
146
147 #[test]
148 fn test_java_entity_extraction() {
149 let code = r#"
150package com.example;
151
152import java.util.List;
153
154public class UserService {
155 private String name;
156
157 public UserService(String name) {
158 this.name = name;
159 }
160
161 public List<User> getUsers() {
162 return db.findAll();
163 }
164
165 public void createUser(User user) {
166 db.save(user);
167 }
168}
169
170interface Repository<T> {
171 T findById(String id);
172 List<T> findAll();
173}
174
175enum Status {
176 ACTIVE,
177 INACTIVE,
178 DELETED
179}
180"#;
181 let plugin = CodeParserPlugin;
182 let entities = plugin.extract_entities(code, "UserService.java");
183 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
184 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
185 eprintln!("Java entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
186
187 assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
188 assert!(names.contains(&"Repository"), "Should find interface Repository, got: {:?}", names);
189 assert!(names.contains(&"Status"), "Should find enum Status, got: {:?}", names);
190 }
191
192 #[test]
193 fn test_java_nested_methods() {
194 let code = r#"
195public class Calculator {
196 public int add(int a, int b) {
197 return a + b;
198 }
199
200 public int subtract(int a, int b) {
201 return a - b;
202 }
203}
204"#;
205 let plugin = CodeParserPlugin;
206 let entities = plugin.extract_entities(code, "Calculator.java");
207 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
208 eprintln!("Java nested: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
209
210 assert!(names.contains(&"Calculator"), "Should find Calculator class");
211 assert!(names.contains(&"add"), "Should find add method, got: {:?}", names);
212 assert!(names.contains(&"subtract"), "Should find subtract method, got: {:?}", names);
213
214 let add = entities.iter().find(|e| e.name == "add").unwrap();
216 assert!(add.parent_id.is_some(), "add should have parent_id");
217 }
218
219 #[test]
220 fn test_c_entity_extraction() {
221 let code = r#"
222#include <stdio.h>
223
224struct Point {
225 int x;
226 int y;
227};
228
229enum Color {
230 RED,
231 GREEN,
232 BLUE
233};
234
235typedef struct {
236 char name[50];
237 int age;
238} Person;
239
240void greet(const char* name) {
241 printf("Hello, %s!\n", name);
242}
243
244int add(int a, int b) {
245 return a + b;
246}
247
248int main() {
249 greet("world");
250 return 0;
251}
252"#;
253 let plugin = CodeParserPlugin;
254 let entities = plugin.extract_entities(code, "main.c");
255 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
256 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
257 eprintln!("C entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
258
259 assert!(names.contains(&"greet"), "Should find greet function, got: {:?}", names);
260 assert!(names.contains(&"add"), "Should find add function, got: {:?}", names);
261 assert!(names.contains(&"main"), "Should find main function, got: {:?}", names);
262 assert!(names.contains(&"Point"), "Should find Point struct, got: {:?}", names);
263 assert!(names.contains(&"Color"), "Should find Color enum, got: {:?}", names);
264 }
265
266 #[test]
267 fn test_c_function_locals_not_extracted() {
268 let code = r#"
269int global_count = 0;
270int helper(void);
271
272int main(void) {
273 int local = helper();
274 const char *message = "hello";
275 return local + global_count;
276}
277"#;
278 let plugin = CodeParserPlugin;
279 let entities = plugin.extract_entities(code, "main.c");
280 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
281
282 assert!(names.contains(&"global_count"), "got: {:?}", names);
283 assert!(names.contains(&"helper"), "got: {:?}", names);
284 assert!(names.contains(&"main"), "got: {:?}", names);
285 assert!(!names.contains(&"local"), "got: {:?}", names);
286 assert!(!names.contains(&"message"), "got: {:?}", names);
287 }
288
289 #[test]
290 fn test_cpp_entity_extraction() {
291 let code = "namespace math {\nclass Vector3 {\npublic:\n float length() const { return 0; }\n};\n}\nvoid greet() {}\n";
292 let plugin = CodeParserPlugin;
293 let entities = plugin.extract_entities(code, "main.cpp");
294 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
295 assert!(names.contains(&"math"), "got: {:?}", names);
296 assert!(names.contains(&"Vector3"), "got: {:?}", names);
297 assert!(names.contains(&"greet"), "got: {:?}", names);
298 }
299
300 #[test]
301 fn test_cpp_function_locals_not_extracted() {
302 let code = r#"
303int global_value = 1;
304int helper();
305
306int main() {
307 int local = helper();
308 auto lambda = []() {
309 int lambda_local = 3;
310 return lambda_local;
311 };
312 return local + lambda();
313}
314"#;
315 let plugin = CodeParserPlugin;
316 let entities = plugin.extract_entities(code, "main.cpp");
317 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
318
319 assert!(names.contains(&"global_value"), "got: {:?}", names);
320 assert!(names.contains(&"helper"), "got: {:?}", names);
321 assert!(names.contains(&"main"), "got: {:?}", names);
322 assert!(!names.contains(&"local"), "got: {:?}", names);
323 assert!(!names.contains(&"lambda"), "got: {:?}", names);
324 assert!(!names.contains(&"lambda_local"), "got: {:?}", names);
325 }
326
327 #[test]
328 fn test_ruby_entity_extraction() {
329 let code = "module Auth\n class User\n def greet\n \"hi\"\n end\n end\nend\ndef helper(x)\n x * 2\nend\n";
330 let plugin = CodeParserPlugin;
331 let entities = plugin.extract_entities(code, "auth.rb");
332 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
333 assert!(names.contains(&"Auth"), "got: {:?}", names);
334 assert!(names.contains(&"User"), "got: {:?}", names);
335 assert!(names.contains(&"helper"), "got: {:?}", names);
336 }
337
338 #[test]
339 fn test_csharp_entity_extraction() {
340 let code = "namespace MyApp {\npublic class User {\n public string GetName() { return \"\"; }\n}\npublic enum Role { Admin, User }\n}\n";
341 let plugin = CodeParserPlugin;
342 let entities = plugin.extract_entities(code, "Models.cs");
343 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
344 assert!(names.contains(&"MyApp"), "got: {:?}", names);
345 assert!(names.contains(&"User"), "got: {:?}", names);
346 assert!(names.contains(&"Role"), "got: {:?}", names);
347 }
348
349 #[test]
350 fn test_swift_entity_extraction() {
351 let code = r#"
352import Foundation
353
354typealias Handler = (Int) -> Void
355
356prefix operator ~~~
357
358class UserService {
359 var name: String
360
361 init(name: String) {
362 self.name = name
363 }
364
365 deinit {
366 print("freed")
367 }
368
369 func getUsers() -> [User] {
370 return db.findAll()
371 }
372}
373
374struct Point {
375 var x: Double
376 var y: Double
377
378 subscript(index: Int) -> Double {
379 return x + y + Double(index)
380 }
381}
382
383enum Status {
384 case active
385 case inactive
386 case deleted
387}
388
389protocol Repository {
390 associatedtype Canvas
391 func findById(id: String) -> Canvas?
392 func findAll() -> [Canvas]
393}
394
395func helper(x: Int) -> Int {
396 return x * 2
397}
398"#;
399 let plugin = CodeParserPlugin;
400 let entities = plugin.extract_entities(code, "UserService.swift");
401 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
402 eprintln!("Swift entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
403
404 assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
405 assert!(names.contains(&"Point"), "Should find struct Point, got: {:?}", names);
406 assert!(names.contains(&"Status"), "Should find enum Status, got: {:?}", names);
407 assert!(names.contains(&"Repository"), "Should find protocol Repository, got: {:?}", names);
408 assert!(names.contains(&"Canvas"), "Should find associatedtype Canvas, got: {:?}", names);
409 assert!(names.contains(&"Handler"), "Should find typealias Handler, got: {:?}", names);
410 assert!(names.contains(&"~~~"), "Should find custom operator ~~~, got: {:?}", names);
411 assert!(names.contains(&"init"), "Should find initializer init, got: {:?}", names);
412 assert!(names.contains(&"deinit"), "Should find deinitializer deinit, got: {:?}", names);
413 assert!(names.contains(&"subscript"), "Should find subscript, got: {:?}", names);
414 assert!(names.contains(&"helper"), "Should find function helper, got: {:?}", names);
415
416 let handler = entities.iter().find(|e| e.name == "Handler").unwrap();
417 assert_eq!(handler.entity_type, "type");
418 assert!(handler.parent_id.is_none());
419
420 let operator = entities.iter().find(|e| e.name == "~~~").unwrap();
421 assert_eq!(operator.entity_type, "operator");
422 assert!(operator.parent_id.is_none());
423
424 let user_service = entities.iter().find(|e| e.name == "UserService").unwrap();
425 assert_eq!(user_service.entity_type, "class");
426
427 let initializer = entities.iter().find(|e| e.name == "init").unwrap();
428 assert_eq!(initializer.entity_type, "init");
429 assert_eq!(initializer.parent_id.as_deref(), Some(user_service.id.as_str()));
430 assert_eq!(initializer.id, "UserService.swift::class::UserService::init");
431
432 let deinitializer = entities.iter().find(|e| e.name == "deinit").unwrap();
433 assert_eq!(deinitializer.entity_type, "deinit");
434 assert_eq!(deinitializer.parent_id.as_deref(), Some(user_service.id.as_str()));
435 assert_eq!(
436 deinitializer.id,
437 "UserService.swift::class::UserService::deinit"
438 );
439
440 let point = entities.iter().find(|e| e.name == "Point").unwrap();
441 assert_eq!(point.entity_type, "struct");
442
443 let subscript = entities.iter().find(|e| e.name == "subscript").unwrap();
444 assert_eq!(subscript.entity_type, "subscript");
445 assert_eq!(subscript.parent_id.as_deref(), Some(point.id.as_str()));
446 assert_eq!(
447 subscript.id,
448 "UserService.swift::struct::Point::subscript"
449 );
450
451 let status = entities.iter().find(|e| e.name == "Status").unwrap();
452 assert_eq!(status.entity_type, "enum");
453
454 let repository = entities.iter().find(|e| e.name == "Repository").unwrap();
455 assert_eq!(repository.entity_type, "protocol");
456 assert_eq!(repository.id, "UserService.swift::protocol::Repository");
457
458 let canvas = entities.iter().find(|e| e.name == "Canvas").unwrap();
459 assert_eq!(canvas.entity_type, "associatedtype");
460 assert_eq!(canvas.parent_id.as_deref(), Some(repository.id.as_str()));
461 assert_eq!(
462 canvas.id,
463 "UserService.swift::protocol::Repository::Canvas"
464 );
465 }
466
467 #[test]
468 fn test_swift_multi_binding_property_extraction() {
469 let code = r#"
470struct Point {
471 var x, y: Int
472}
473"#;
474 let plugin = CodeParserPlugin;
475 let entities = plugin.extract_entities(code, "Point.swift");
476 let point = entities.iter().find(|e| e.name == "Point").unwrap();
477 let properties: Vec<_> = entities
478 .iter()
479 .filter(|e| e.entity_type == "property")
480 .collect();
481
482 assert_eq!(
483 properties.iter().map(|e| e.name.as_str()).collect::<Vec<_>>(),
484 vec!["x", "y"]
485 );
486 assert!(properties
487 .iter()
488 .all(|property| property.parent_id.as_deref() == Some(point.id.as_str())));
489 assert_eq!(properties[0].content, "var x: Int");
490 assert_eq!(properties[1].content, "var y: Int");
491 }
492
493 #[test]
494 fn test_swift_multi_binding_property_content_is_per_binding() {
495 let typed_code = r#"
496struct Types {
497 var x: Int, y: String
498}
499"#;
500 let plugin = CodeParserPlugin;
501 let typed_entities = plugin.extract_entities(typed_code, "Types.swift");
502 let typed_properties: Vec<_> = typed_entities
503 .iter()
504 .filter(|e| e.entity_type == "property")
505 .collect();
506 assert_eq!(typed_properties[0].content, "var x: Int");
507 assert_eq!(typed_properties[1].content, "var y: String");
508
509 let mixed_code = r#"
510struct Mixed {
511 var x, y: Int, z: String
512}
513"#;
514 let mixed_entities = plugin.extract_entities(mixed_code, "Mixed.swift");
515 let mixed_properties: Vec<_> = mixed_entities
516 .iter()
517 .filter(|e| e.entity_type == "property")
518 .collect();
519 assert_eq!(mixed_properties[0].content, "var x: Int");
520 assert_eq!(mixed_properties[1].content, "var y: Int");
521 assert_eq!(mixed_properties[2].content, "var z: String");
522
523 let generic_code = r#"
524struct GenericTypes {
525 var lookup: Dictionary<String, Int>, count: Int
526}
527"#;
528 let generic_entities = plugin.extract_entities(generic_code, "GenericTypes.swift");
529 let generic_properties: Vec<_> = generic_entities
530 .iter()
531 .filter(|e| e.entity_type == "property")
532 .collect();
533 assert_eq!(
534 generic_properties[0].content,
535 "var lookup: Dictionary<String, Int>"
536 );
537 assert_eq!(generic_properties[1].content, "var count: Int");
538
539 let initializer_code = r#"
540struct Initializers {
541 var a = Foo(), b = Bar()
542}
543"#;
544 let initializer_entities = plugin.extract_entities(initializer_code, "Initializers.swift");
545 let initializer_properties: Vec<_> = initializer_entities
546 .iter()
547 .filter(|e| e.entity_type == "property")
548 .collect();
549 assert!(initializer_properties[0].content.contains("Foo()"));
550 assert!(!initializer_properties[0].content.contains("Bar()"));
551 assert!(initializer_properties[1].content.contains("Bar()"));
552 assert!(!initializer_properties[1].content.contains("Foo()"));
553
554 let constants_code = r#"
555struct Constants {
556 let first, second, third: Int
557}
558"#;
559 let constants_entities = plugin.extract_entities(constants_code, "Constants.swift");
560 let constants_properties: Vec<_> = constants_entities
561 .iter()
562 .filter(|e| e.entity_type == "property")
563 .collect();
564 assert_eq!(
565 constants_properties.iter().map(|e| e.name.as_str()).collect::<Vec<_>>(),
566 vec!["first", "second", "third"]
567 );
568 assert_eq!(constants_properties[0].content, "let first: Int");
569 assert_eq!(constants_properties[1].content, "let second: Int");
570 assert_eq!(constants_properties[2].content, "let third: Int");
571
572 let semicolon_code = r#"
573struct Semicolons {
574 var left, right: Int; var next: Int
575}
576"#;
577 let semicolon_entities = plugin.extract_entities(semicolon_code, "Semicolons.swift");
578 let semicolon_properties: Vec<_> = semicolon_entities
579 .iter()
580 .filter(|e| e.entity_type == "property")
581 .collect();
582 assert_eq!(semicolon_properties[0].content, "var left: Int");
583 assert_eq!(semicolon_properties[1].content, "var right: Int");
584 assert_eq!(semicolon_properties[2].content, "var next: Int");
585 }
586
587 #[test]
588 fn test_swift_body_locals_not_extracted_as_properties() {
589 let code = r#"
590class Cache {
591 var stored: Int
592
593 var computed: Int {
594 let computedLocal = stored + 1
595 func computedNested() -> Int {
596 return computedLocal
597 }
598 return computedNested()
599 }
600
601 var explicit: Int {
602 get {
603 let getterLocal = stored
604 func getterNested() -> Int {
605 return getterLocal
606 }
607 return getterNested()
608 }
609 }
610
611 init(seed: Int) {
612 let initial = seed
613 self.stored = initial
614 }
615
616 func value() -> Int {
617 let doubled = stored * 2
618 var offset = doubled + 1
619 func nested() -> Int {
620 let insideNested = offset
621 return insideNested
622 }
623 return nested()
624 }
625
626 subscript(index: Int) -> Int {
627 let shifted = index + stored
628 func subscriptNested() -> Int {
629 return shifted
630 }
631 return subscriptNested()
632 }
633
634 deinit {
635 let closing = stored
636 _ = closing
637 }
638}
639"#;
640 let plugin = CodeParserPlugin;
641 let entities = plugin.extract_entities(code, "Cache.swift");
642 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
643
644 assert!(names.contains(&"Cache"), "got: {:?}", names);
645 assert!(names.contains(&"stored"), "got: {:?}", names);
646 assert!(names.contains(&"computed"), "got: {:?}", names);
647 assert!(names.contains(&"explicit"), "got: {:?}", names);
648 assert!(names.contains(&"init"), "got: {:?}", names);
649 assert!(names.contains(&"value"), "got: {:?}", names);
650 assert!(names.contains(&"computedNested"), "got: {:?}", names);
651 assert!(names.contains(&"getterNested"), "got: {:?}", names);
652 assert!(names.contains(&"nested"), "got: {:?}", names);
653 assert!(names.contains(&"subscriptNested"), "got: {:?}", names);
654 assert!(names.contains(&"subscript"), "got: {:?}", names);
655 assert!(names.contains(&"deinit"), "got: {:?}", names);
656 assert!(!names.contains(&"Int"), "got: {:?}", names);
657
658 for local in [
659 "computedLocal",
660 "getterLocal",
661 "initial",
662 "doubled",
663 "offset",
664 "insideNested",
665 "shifted",
666 "closing",
667 ] {
668 assert!(!names.contains(&local), "{local} should not be an entity. Got: {:?}", names);
669 }
670 }
671
672 #[test]
673 fn test_swift_suppressed_multi_binding_initializers_are_traversed() {
674 let code = r#"
675func outer() {
676 let a = { func innerA() -> Int { 1 } },
677 b = { func innerB() -> Int { 2 } }
678}
679"#;
680 let plugin = CodeParserPlugin;
681 let entities = plugin.extract_entities(code, "Locals.swift");
682 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
683
684 assert!(names.contains(&"outer"), "got: {:?}", names);
685 assert!(names.contains(&"innerA"), "got: {:?}", names);
686 assert!(names.contains(&"innerB"), "got: {:?}", names);
687 assert!(!names.contains(&"a"), "local binding should stay suppressed: {:?}", names);
688 assert!(!names.contains(&"b"), "local binding should stay suppressed: {:?}", names);
689 }
690
691 #[test]
692 fn test_swift_conditional_compilation_inside_struct() {
693 let code = r#"
694import ArgumentParser
695
696public struct TuistCommand: AsyncParsableCommand {
697 public init() {}
698
699 public static var configuration: CommandConfiguration {
700 let comment = "brace in string }"
701 let multiline = """
702 brace in multiline }
703 escaped \"""
704 """
705 /* brace in comment } */
706 CommandConfiguration(commandName: "tuist")
707 }
708
709 #if os(macOS)
710 public static var groupedSubcommands: [ParsableCommand.Type] {
711 [InstallCommand.self]
712 }
713 #else
714 public static var groupedSubcommands: [ParsableCommand.Type] {
715 []
716 }
717 #endif
718
719 public func run() async throws {}
720}
721"#;
722 let plugin = CodeParserPlugin;
723 let entities = plugin.extract_entities(code, "TuistCommand.swift");
724 eprintln!(
725 "Swift conditional entities: {:?}",
726 entities
727 .iter()
728 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
729 .collect::<Vec<_>>()
730 );
731
732 let command = entities
733 .iter()
734 .find(|e| e.name == "TuistCommand")
735 .expect("Should recover TuistCommand struct");
736 assert_eq!(command.entity_type, "struct");
737 assert!(command.parent_id.is_none());
738
739 let renamed_code = code.replace("TuistCommand", "RenamedCommand");
740 let renamed_entities = plugin.extract_entities(&renamed_code, "TuistCommand.swift");
741 let renamed_command = renamed_entities
742 .iter()
743 .find(|e| e.name == "RenamedCommand")
744 .expect("Should recover renamed command struct");
745 assert_eq!(command.structural_hash, renamed_command.structural_hash);
746
747 for member in ["init", "configuration", "run"] {
748 let entity = entities
749 .iter()
750 .find(|e| e.name == member)
751 .unwrap_or_else(|| panic!("Should find {member}"));
752 assert_eq!(entity.parent_id.as_deref(), Some(command.id.as_str()));
753 }
754
755 let grouped_subcommands: Vec<_> = entities
756 .iter()
757 .filter(|e| e.name == "groupedSubcommands")
758 .collect();
759 assert_eq!(grouped_subcommands.len(), 2);
760 assert!(grouped_subcommands
761 .iter()
762 .all(|entity| entity.parent_id.as_deref() == Some(command.id.as_str())));
763 }
764
765 #[test]
766 fn test_swift_conditional_compilation_with_interpolated_brace_string() {
767 let plugin = CodeParserPlugin;
768 for (container_name, code) in [
769 (
770 "Config",
771 r#"
772class Config {
773 let tpl = "prefix \("}") suffix"
774#if DEBUG
775 func dump() { print(tpl) }
776#endif
777 func render() -> String { return tpl }
778}
779
780struct Tail { let q: Int }
781"#,
782 ),
783 (
784 "RawConfig",
785 r##"
786class RawConfig {
787 let tpl = #"prefix \#("{") suffix"#
788#if DEBUG
789 func dump() { print(tpl) }
790#endif
791 func render() -> String { return tpl }
792}
793"##,
794 ),
795 (
796 "MultilineConfig",
797 r#"
798class MultilineConfig {
799 let tpl = """
800 prefix \("}") suffix
801 """
802#if DEBUG
803 func dump() { print(tpl) }
804#endif
805 func render() -> String { return tpl }
806}
807"#,
808 ),
809 (
810 "ClosureConfig",
811 r#"
812class ClosureConfig {
813 let tpl = "prefix \(["}"].map { $0 }.joined()) suffix"
814#if DEBUG
815 func dump() { print(tpl) }
816#endif
817 func render() -> String { return tpl }
818}
819"#,
820 ),
821 ] {
822 let file_path = format!("{container_name}.swift");
823 let entities = plugin.extract_entities(code, &file_path);
824 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
825 let container = entities
826 .iter()
827 .find(|e| e.name == container_name)
828 .unwrap_or_else(|| {
829 panic!("Should recover {container_name}, got: {names:?}");
830 });
831 assert_eq!(container.entity_type, "class");
832 assert!(container.parent_id.is_none());
833
834 for member in ["tpl", "dump", "render"] {
835 let entity = entities
836 .iter()
837 .find(|e| e.name == member)
838 .unwrap_or_else(|| {
839 panic!("Should find {member} in {container_name}, got: {names:?}");
840 });
841 assert_eq!(entity.parent_id.as_deref(), Some(container.id.as_str()));
842 }
843 }
844 }
845
846 #[test]
847 fn test_elixir_entity_extraction() {
848 let code = r#"
849defmodule MyApp.Accounts do
850 def create_user(attrs) do
851 %User{}
852 |> User.changeset(attrs)
853 |> Repo.insert()
854 end
855
856 defp validate(attrs) do
857 # private helper
858 :ok
859 end
860
861 defmacro is_admin(user) do
862 quote do
863 unquote(user).role == :admin
864 end
865 end
866
867 defguard is_positive(x) when is_integer(x) and x > 0
868end
869
870defprotocol Printable do
871 def to_string(data)
872end
873
874defimpl Printable, for: Integer do
875 def to_string(i), do: Integer.to_string(i)
876end
877"#;
878 let plugin = CodeParserPlugin;
879 let entities = plugin.extract_entities(code, "accounts.ex");
880 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
881 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
882 eprintln!("Elixir entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
883
884 assert!(names.contains(&"MyApp.Accounts"), "Should find module, got: {:?}", names);
885 assert!(names.contains(&"create_user"), "Should find def, got: {:?}", names);
886 assert!(names.contains(&"validate"), "Should find defp, got: {:?}", names);
887 assert!(names.contains(&"is_admin"), "Should find defmacro, got: {:?}", names);
888 assert!(names.contains(&"Printable"), "Should find defprotocol, got: {:?}", names);
889
890 let create_user = entities.iter().find(|e| e.name == "create_user").unwrap();
892 assert!(create_user.parent_id.is_some(), "create_user should be nested under module");
893 }
894
895 #[test]
896 #[cfg(feature = "lang-clojure")]
897 fn test_clojure_entity_extraction() {
898 let code = r#"
899(ns my.app.core
900 (:require [clojure.string :as str]))
901
902(def my-var 42)
903
904(def ^:private secret "hunter2")
905
906(defonce connection (atom nil))
907
908(defn greet
909 "Returns a greeting string."
910 [name]
911 (str "Hello, " name "!"))
912
913(defmacro unless [pred & body]
914 `(when (not ~pred) ~@body))
915
916(defprotocol Greeter
917 (greet! [this name]))
918
919(defrecord Person [name age])
920
921(defmulti area :shape)
922
923(defmethod area :circle [{:keys [radius]}]
924 (* Math/PI radius radius))
925
926(defmethod area :rectangle [{:keys [width height]}]
927 (* width height))
928"#;
929 let plugin = CodeParserPlugin;
930 let entities = plugin.extract_entities(code, "core.clj");
931 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
932 eprintln!(
933 "Clojure entities: {:?}",
934 entities
935 .iter()
936 .map(|e| (&e.name, &e.entity_type))
937 .collect::<Vec<_>>()
938 );
939
940 assert!(!names.contains(&"my.app.core"), "Should not extract ns form as entity, got: {:?}", names);
941 assert!(names.contains(&"my-var"), "Should find def, got: {:?}", names);
942 assert!(names.contains(&"secret"), "Should strip ^:private metadata from name, got: {:?}", names);
943 assert!(names.contains(&"connection"), "Should find defonce, got: {:?}", names);
944 assert!(names.contains(&"greet"), "Should find defn, got: {:?}", names);
945 assert!(names.contains(&"unless"), "Should find defmacro, got: {:?}", names);
946 assert!(names.contains(&"Greeter"), "Should find defprotocol, got: {:?}", names);
947 assert!(names.contains(&"Person"), "Should find defrecord, got: {:?}", names);
948 assert!(names.contains(&"area"), "Should find defmulti, got: {:?}", names);
949 assert!(names.contains(&"area/:circle"), "Should find defmethod area :circle, got: {:?}", names);
951 assert!(names.contains(&"area/:rectangle"), "Should find defmethod area :rectangle, got: {:?}", names);
952 let ids: Vec<&str> = entities.iter().map(|e| e.id.as_str()).collect();
953 assert!(ids.iter().collect::<std::collections::HashSet<_>>().len() == ids.len(),
954 "All entity IDs must be unique, got: {:?}", ids);
955 }
956
957 #[test]
958 #[cfg(feature = "lang-clojure")]
959 fn test_clojure_defn_private() {
960 let code = r#"
961(ns my.app)
962
963(defn- private-helper [x]
964 (* x 2))
965"#;
966 let plugin = CodeParserPlugin;
967 let entities = plugin.extract_entities(code, "app.clj");
968 let entity = entities
969 .iter()
970 .find(|e| e.name == "private-helper")
971 .expect("Should extract defn- as a function entity");
972 assert_eq!(entity.entity_type, "function");
973 }
974
975 #[test]
976 #[cfg(feature = "lang-clojure")]
977 fn test_clojure_predicate_and_bang_functions() {
978 let code = r#"
979(ns my.app.validators)
980
981(defn empty? [coll]
982 (= 0 (count coll)))
983
984(defn reset! [state new-val]
985 (compare-and-set! state @state new-val))
986"#;
987 let plugin = CodeParserPlugin;
988 let entities = plugin.extract_entities(code, "validators.clj");
989 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
990 assert!(names.contains(&"empty?"), "Should extract predicate fn empty?, got: {:?}", names);
991 assert!(names.contains(&"reset!"), "Should extract bang fn reset!, got: {:?}", names);
992 let empty_entity = entities.iter().find(|e| e.name == "empty?").unwrap();
993 let reset_entity = entities.iter().find(|e| e.name == "reset!").unwrap();
994 assert_eq!(empty_entity.entity_type, "function");
995 assert_eq!(reset_entity.entity_type, "function");
996 }
997
998 #[test]
999 #[cfg(feature = "lang-clojure")]
1000 fn test_clojure_dynamic_vars_and_equality_fns() {
1001 let code = r#"
1002(ns my.app.core)
1003
1004(def *db* (atom nil))
1005
1006(defn not= [a b]
1007 (not (= a b)))
1008"#;
1009 let plugin = CodeParserPlugin;
1010 let entities = plugin.extract_entities(code, "core.clj");
1011 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1012 assert!(names.contains(&"*db*"), "Should extract dynamic var *db*, got: {:?}", names);
1013 assert!(names.contains(&"not="), "Should extract fn not=, got: {:?}", names);
1014 let db_entity = entities.iter().find(|e| e.name == "*db*").unwrap();
1015 let noteq_entity = entities.iter().find(|e| e.name == "not=").unwrap();
1016 assert_eq!(db_entity.entity_type, "var");
1017 assert_eq!(noteq_entity.entity_type, "function");
1018 }
1019
1020 #[test]
1021 #[cfg(feature = "lang-clojure")]
1022 fn test_clojure_deftype_definterface_defstruct() {
1023 let code = r#"
1024(ns my.app)
1025
1026(deftype MyType [field])
1027
1028(definterface IFoo
1029 (foo [this]))
1030
1031(defstruct point :x :y)
1032"#;
1033 let plugin = CodeParserPlugin;
1034 let entities = plugin.extract_entities(code, "app.clj");
1035 let by_name = |name: &str| entities.iter().find(|e| e.name == name);
1036
1037 assert!(by_name("MyType").is_some(), "Should extract deftype, got: {:?}", entities.iter().map(|e| &e.name).collect::<Vec<_>>());
1038 assert_eq!(by_name("MyType").unwrap().entity_type, "type");
1039
1040 assert!(by_name("IFoo").is_some(), "Should extract definterface, got: {:?}", entities.iter().map(|e| &e.name).collect::<Vec<_>>());
1041 assert_eq!(by_name("IFoo").unwrap().entity_type, "interface");
1042
1043 assert!(by_name("point").is_some(), "Should extract defstruct, got: {:?}", entities.iter().map(|e| &e.name).collect::<Vec<_>>());
1044 assert_eq!(by_name("point").unwrap().entity_type, "struct");
1045 }
1046
1047 #[test]
1048 #[cfg(feature = "lang-clojure")]
1049 fn test_clojure_cljc_extension() {
1050 let code = r#"
1051(ns my.app.shared)
1052
1053(defn platform-key [] :default)
1054
1055(def shared-value 99)
1056"#;
1057 let plugin = CodeParserPlugin;
1058 let entities = plugin.extract_entities(code, "shared.cljc");
1059 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1060 assert!(names.contains(&"platform-key"), "Should extract defn from .cljc, got: {:?}", names);
1061 assert!(names.contains(&"shared-value"), "Should extract def from .cljc, got: {:?}", names);
1062 }
1063
1064 #[test]
1065 #[cfg(feature = "lang-clojure")]
1066 fn test_clojure_defmethod_non_keyword_dispatch() {
1067 let code = r#"
1068(ns my.app)
1069
1070(defmulti process identity)
1071
1072(defmethod process nil [_] :nothing)
1073
1074(defmethod process "string" [s] s)
1075
1076(defmethod process 42 [n] n)
1077"#;
1078 let plugin = CodeParserPlugin;
1079 let entities = plugin.extract_entities(code, "app.clj");
1080 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1081 assert!(names.contains(&"process"), "Should extract defmulti, got: {:?}", names);
1082 assert!(names.contains(&"process/nil"), "Should extract defmethod with nil dispatch, got: {:?}", names);
1083 assert!(names.contains(&"process/\"string\""), "Should extract defmethod with string dispatch, got: {:?}", names);
1084 assert!(names.contains(&"process/42"), "Should extract defmethod with integer dispatch, got: {:?}", names);
1085 let ids: Vec<&str> = entities.iter().map(|e| e.id.as_str()).collect();
1086 assert!(
1087 ids.iter().collect::<std::collections::HashSet<_>>().len() == ids.len(),
1088 "All entity IDs must be unique, got: {:?}", ids
1089 );
1090 }
1091
1092 #[test]
1093 fn test_bash_entity_extraction() {
1094 let code = r#"#!/bin/bash
1095
1096greet() {
1097 echo "Hello, $1!"
1098}
1099
1100function deploy {
1101 echo "deploying..."
1102}
1103
1104# not a function
1105echo "main script"
1106"#;
1107 let plugin = CodeParserPlugin;
1108 let entities = plugin.extract_entities(code, "deploy.sh");
1109 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1110 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1111 eprintln!("Bash entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
1112
1113 assert!(names.contains(&"greet"), "Should find greet(), got: {:?}", names);
1114 assert!(names.contains(&"deploy"), "Should find function deploy, got: {:?}", names);
1115 assert_eq!(entities.len(), 2, "Should only find functions, got: {:?}", names);
1116 }
1117
1118 #[test]
1119 fn test_typescript_entity_extraction() {
1120 let code = r#"
1122export function hello(): string {
1123 return "hello";
1124}
1125
1126export class Greeter {
1127 greet(name: string): string {
1128 return `Hello, ${name}!`;
1129 }
1130}
1131"#;
1132 let plugin = CodeParserPlugin;
1133 let entities = plugin.extract_entities(code, "test.ts");
1134 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1135 assert!(names.contains(&"hello"), "Should find hello function");
1136 assert!(names.contains(&"Greeter"), "Should find Greeter class");
1137 }
1138
1139 #[test]
1140 fn test_same_line_typescript_overload_ids_are_unique() {
1141 let code = "function f(a: number): void {}; function f(a: string): void {}\n";
1142 let plugin = CodeParserPlugin;
1143 let entities = plugin.extract_entities(code, "over.ts");
1144 let overloads: Vec<&SemanticEntity> = entities
1145 .iter()
1146 .filter(|entity| entity.name == "f" && entity.entity_type == "function")
1147 .collect();
1148 let ids: Vec<&str> = overloads.iter().map(|entity| entity.id.as_str()).collect();
1149
1150 assert_eq!(overloads.len(), 2, "expected both overloads, got: {entities:?}");
1151 assert_eq!(ids, vec!["over.ts::function::f@L1#1", "over.ts::function::f@L1#2"]);
1152 }
1153
1154 #[test]
1155 fn test_same_line_duplicate_parent_ids_are_propagated_to_children() {
1156 let code = "class C { m(){ return 1 } } class C { m(){ return 2 } }\n";
1157 let plugin = CodeParserPlugin;
1158 let entities = plugin.extract_entities(code, "c.ts");
1159 let classes: Vec<&SemanticEntity> = entities
1160 .iter()
1161 .filter(|entity| entity.name == "C" && entity.entity_type == "class")
1162 .collect();
1163 let methods: Vec<&SemanticEntity> = entities
1164 .iter()
1165 .filter(|entity| entity.name == "m" && entity.entity_type == "method")
1166 .collect();
1167
1168 assert_eq!(classes.len(), 2, "expected both classes, got: {entities:?}");
1169 assert_eq!(methods.len(), 2, "expected both methods, got: {entities:?}");
1170 assert_eq!(classes[0].id, "c.ts::class::C@L1#1");
1171 assert_eq!(classes[1].id, "c.ts::class::C@L1#2");
1172 assert_eq!(methods[0].parent_id.as_deref(), Some("c.ts::class::C@L1#1"));
1173 assert_eq!(methods[1].parent_id.as_deref(), Some("c.ts::class::C@L1#2"));
1174 assert_eq!(methods[0].id, "c.ts::class::C@L1#1::m");
1175 assert_eq!(methods[1].id, "c.ts::class::C@L1#2::m");
1176 }
1177
1178 #[test]
1179 fn test_module_typescript_entity_extraction() {
1180 let code = r#"
1181export function hello(): string {
1182 return "hello";
1183}
1184"#;
1185 let plugin = CodeParserPlugin;
1186 let entities = plugin.extract_entities(code, "test.mts");
1187 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1188
1189 assert!(names.contains(&"hello"), "Should find hello function");
1190 }
1191
1192 #[test]
1193 fn test_commonjs_typescript_entity_extraction() {
1194 let code = r#"
1195export class Greeter {
1196 greet(name: string): string {
1197 return `Hello, ${name}!`;
1198 }
1199}
1200"#;
1201 let plugin = CodeParserPlugin;
1202 let entities = plugin.extract_entities(code, "test.cts");
1203 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1204
1205 assert!(names.contains(&"Greeter"), "Should find Greeter class");
1206 assert!(names.contains(&"greet"), "Should find greet method");
1207 }
1208
1209 #[test]
1210 fn test_typescript_generator_function_entity_extraction() {
1211 let code = r#"
1212export async function* streamUsers(): AsyncGenerator<string> {
1213 yield "alice";
1214}
1215"#;
1216 let plugin = CodeParserPlugin;
1217 let entities = plugin.extract_entities(code, "stream.ts");
1218 let stream = entities.iter().find(|e| e.name == "streamUsers");
1219
1220 assert!(stream.is_some(), "Should find generator function, got: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1221 assert_eq!(stream.unwrap().entity_type, "function");
1222 }
1223
1224 #[test]
1225 fn test_javascript_generator_function_entity_extraction() {
1226 let code = r#"
1227export function* ids() {
1228 yield 1;
1229 yield 2;
1230}
1231"#;
1232 let plugin = CodeParserPlugin;
1233 let entities = plugin.extract_entities(code, "ids.js");
1234 let ids = entities.iter().find(|e| e.name == "ids");
1235
1236 assert!(ids.is_some(), "Should find generator function, got: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1237 assert_eq!(ids.unwrap().entity_type, "function");
1238 }
1239
1240 #[test]
1241 fn test_nested_functions_typescript() {
1242 let code = r#"
1243function outer() {
1244 function inner() {
1245 return 42;
1246 }
1247 return inner();
1248}
1249"#;
1250 let plugin = CodeParserPlugin;
1251 let entities = plugin.extract_entities(code, "nested.ts");
1252 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1253 eprintln!("Nested TS: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
1254
1255 assert!(names.contains(&"outer"), "Should find outer, got: {:?}", names);
1256 assert!(names.contains(&"inner"), "Should find inner, got: {:?}", names);
1257
1258 let inner = entities.iter().find(|e| e.name == "inner").unwrap();
1259 assert!(inner.parent_id.is_some(), "inner should have parent_id");
1260 }
1261
1262 #[test]
1263 fn test_typescript_nested_anonymous_class_fields() {
1264 let code = r#"
1265class L1 {
1266 L2 = class {
1267 L3 = class {
1268 L4 = class {
1269 method() { return 1; }
1270 };
1271 };
1272 };
1273}
1274"#;
1275 let plugin = CodeParserPlugin;
1276 let entities = plugin.extract_entities(code, "a.ts");
1277 let find = |name: &str| {
1278 entities.iter().find(|e| e.name == name).unwrap_or_else(|| {
1279 panic!(
1280 "missing {name}; got: {:?}",
1281 entities
1282 .iter()
1283 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1284 .collect::<Vec<_>>()
1285 )
1286 })
1287 };
1288
1289 let l1 = find("L1");
1290 assert_eq!(l1.entity_type, "class");
1291 let l1_id = l1.id.clone();
1292
1293 let l2 = find("L2");
1294 assert_eq!(l2.entity_type, "field");
1295 assert_eq!(l2.parent_id.as_deref(), Some(l1_id.as_str()));
1296 let l2_id = l2.id.clone();
1297
1298 let l3 = find("L3");
1299 assert_eq!(l3.entity_type, "field");
1300 assert_eq!(l3.parent_id.as_deref(), Some(l2_id.as_str()));
1301 let l3_id = l3.id.clone();
1302
1303 let l4 = find("L4");
1304 assert_eq!(l4.entity_type, "field");
1305 assert_eq!(l4.parent_id.as_deref(), Some(l3_id.as_str()));
1306 let l4_id = l4.id.clone();
1307
1308 let method = find("method");
1309 assert_eq!(method.entity_type, "method");
1310 assert_eq!(method.parent_id.as_deref(), Some(l4_id.as_str()));
1311 assert_eq!(method.id, "a.ts::class::L1::L2::L3::L4::method");
1312 }
1313
1314 #[test]
1315 fn test_nested_functions_python() {
1316 let code = "def outer():\n def inner():\n return 42\n return inner()\n";
1317 let plugin = CodeParserPlugin;
1318 let entities = plugin.extract_entities(code, "nested.py");
1319 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1320
1321 assert!(names.contains(&"outer"), "got: {:?}", names);
1322 assert!(names.contains(&"inner"), "got: {:?}", names);
1323
1324 let inner = entities.iter().find(|e| e.name == "inner").unwrap();
1325 assert!(inner.parent_id.is_some(), "inner should have parent_id");
1326 }
1327
1328 #[test]
1329 fn test_nested_functions_rust() {
1330 let code = "fn outer() {\n fn inner() -> i32 {\n 42\n }\n inner();\n}\n";
1331 let plugin = CodeParserPlugin;
1332 let entities = plugin.extract_entities(code, "nested.rs");
1333 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1334
1335 assert!(names.contains(&"outer"), "got: {:?}", names);
1336 assert!(names.contains(&"inner"), "got: {:?}", names);
1337
1338 let inner = entities.iter().find(|e| e.name == "inner").unwrap();
1339 assert!(inner.parent_id.is_some(), "inner should have parent_id");
1340 }
1341
1342 #[test]
1343 fn test_rust_impl_blocks_unique_names() {
1344 let code = r#"
1345trait Greeting {
1346 fn greet(&self) -> String;
1347}
1348
1349struct Person;
1350struct Robot;
1351struct Cat;
1352
1353impl Greeting for Person {
1354 fn greet(&self) -> String { "Hello".to_string() }
1355}
1356
1357impl Greeting for Robot {
1358 fn greet(&self) -> String { "Beep".to_string() }
1359}
1360
1361impl Greeting for Cat {
1362 fn greet(&self) -> String { "Meow".to_string() }
1363}
1364"#;
1365 let plugin = CodeParserPlugin;
1366 let entities = plugin.extract_entities(code, "impls.rs");
1367 let impl_entities: Vec<&_> = entities.iter()
1368 .filter(|e| e.entity_type == "impl")
1369 .collect();
1370 let names: Vec<&str> = impl_entities.iter().map(|e| e.name.as_str()).collect();
1371
1372 assert_eq!(impl_entities.len(), 3, "Should find 3 impl blocks, got: {:?}", names);
1373 assert!(names.contains(&"Greeting for Person"), "got: {:?}", names);
1374 assert!(names.contains(&"Greeting for Robot"), "got: {:?}", names);
1375 assert!(names.contains(&"Greeting for Cat"), "got: {:?}", names);
1376 }
1377
1378 #[test]
1379 fn test_nested_functions_go() {
1380 let code = "package main\n\nfunc outer() {\n var x int = 42\n _ = x\n}\n";
1382 let plugin = CodeParserPlugin;
1383 let entities = plugin.extract_entities(code, "nested.go");
1384 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1385
1386 assert!(names.contains(&"outer"), "got: {:?}", names);
1387 }
1388
1389 #[test]
1390 fn test_renamed_function_same_structural_hash() {
1391 let code_a = "def get_card():\n return db.query('cards')\n";
1392 let code_b = "def get_card_1():\n return db.query('cards')\n";
1393
1394 let plugin = CodeParserPlugin;
1395 let entities_a = plugin.extract_entities(code_a, "a.py");
1396 let entities_b = plugin.extract_entities(code_b, "b.py");
1397
1398 assert_eq!(entities_a.len(), 1, "Should find one entity in a");
1399 assert_eq!(entities_b.len(), 1, "Should find one entity in b");
1400 assert_eq!(entities_a[0].name, "get_card");
1401 assert_eq!(entities_b[0].name, "get_card_1");
1402
1403 assert_eq!(
1405 entities_a[0].structural_hash, entities_b[0].structural_hash,
1406 "Renamed function with identical body should have same structural_hash"
1407 );
1408
1409 assert_ne!(
1411 entities_a[0].content_hash, entities_b[0].content_hash,
1412 "Content hash should differ since raw content includes the name"
1413 );
1414 }
1415
1416 #[test]
1417 fn test_swift_renamed_operator_same_structural_hash() {
1418 let plugin = CodeParserPlugin;
1419 let entities_a = plugin.extract_entities("prefix operator ~~~\n", "a.swift");
1420 let entities_b = plugin.extract_entities("prefix operator !!!\n", "b.swift");
1421
1422 assert_eq!(entities_a.len(), 1, "Should find one entity in a");
1423 assert_eq!(entities_b.len(), 1, "Should find one entity in b");
1424 assert_eq!(entities_a[0].name, "~~~");
1425 assert_eq!(entities_b[0].name, "!!!");
1426 assert_eq!(entities_a[0].entity_type, "operator");
1427 assert_eq!(entities_b[0].entity_type, "operator");
1428 assert_eq!(
1429 entities_a[0].structural_hash, entities_b[0].structural_hash,
1430 "Renamed operator with otherwise identical declaration should have same structural_hash"
1431 );
1432 assert_ne!(
1433 entities_a[0].content_hash, entities_b[0].content_hash,
1434 "Content hash should differ since raw content includes the operator token"
1435 );
1436 }
1437
1438 #[test]
1439 fn test_swift_synthesized_names_disambiguate_overloads() {
1440 let plugin = CodeParserPlugin;
1441 let code = r#"
1442struct Matrix {
1443 subscript(row: Int) -> Double {
1444 return Double(row)
1445 }
1446
1447 subscript(row: Int, column: Int) -> Double {
1448 return Double(row + column)
1449 }
1450}
1451
1452class Builder {
1453 init(value: Int) {}
1454 init(text: String) {}
1455}
1456"#;
1457
1458 let entities = plugin.extract_entities(code, "Overloads.swift");
1459
1460 let subscript_ids: Vec<&str> = entities
1461 .iter()
1462 .filter(|e| e.entity_type == "subscript")
1463 .map(|e| e.id.as_str())
1464 .collect();
1465 assert_eq!(subscript_ids.len(), 2);
1466 assert_ne!(subscript_ids[0], subscript_ids[1]);
1467 assert!(subscript_ids.iter().all(|id| id.contains("@L")));
1468
1469 let init_ids: Vec<&str> = entities
1470 .iter()
1471 .filter(|e| e.entity_type == "init")
1472 .map(|e| e.id.as_str())
1473 .collect();
1474 assert_eq!(init_ids.len(), 2);
1475 assert_ne!(init_ids[0], init_ids[1]);
1476 assert!(init_ids.iter().all(|id| id.contains("@L")));
1477 }
1478
1479 #[test]
1480 fn test_hcl_entity_extraction() {
1481 let code = r#"
1482region = "eu-west-1"
1483
1484variable "image_id" {
1485 type = string
1486}
1487
1488resource "aws_instance" "web" {
1489 ami = var.image_id
1490
1491 lifecycle {
1492 create_before_destroy = true
1493 }
1494}
1495"#;
1496 let plugin = CodeParserPlugin;
1497 let entities = plugin.extract_entities(code, "main.tf");
1498 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1499 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1500 eprintln!("HCL entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
1501
1502 assert!(names.contains(&"region"), "Should find top-level attribute, got: {:?}", names);
1503 assert!(names.contains(&"variable.image_id"), "Should find variable block, got: {:?}", names);
1504 assert!(names.contains(&"resource.aws_instance.web"), "Should find resource block, got: {:?}", names);
1505 assert!(
1506 names.contains(&"resource.aws_instance.web.lifecycle"),
1507 "Should find nested lifecycle block with qualified name, got: {:?}",
1508 names
1509 );
1510 assert!(!names.contains(&"ami"), "Should skip nested attributes inside blocks, got: {:?}", names);
1511 assert!(
1512 !names.contains(&"create_before_destroy"),
1513 "Should skip nested attributes inside nested blocks, got: {:?}",
1514 names
1515 );
1516
1517 let lifecycle = entities
1518 .iter()
1519 .find(|e| e.name == "resource.aws_instance.web.lifecycle")
1520 .unwrap();
1521 assert!(lifecycle.parent_id.is_some(), "lifecycle should be nested under resource");
1522 assert!(types.contains(&"attribute"), "Should preserve attribute entity type for top-level attributes");
1523 }
1524
1525 #[test]
1526 fn test_kotlin_entity_extraction() {
1527 let code = r#"
1528class UserService {
1529 val name: String = ""
1530
1531 fun greet(): String {
1532 return "Hello, $name"
1533 }
1534
1535 companion object {
1536 fun create(): UserService = UserService()
1537 }
1538}
1539
1540interface Repository {
1541 fun findById(id: Int): Any?
1542}
1543
1544object AppConfig {
1545 val version = "1.0"
1546}
1547
1548fun topLevel(x: Int): Int = x * 2
1549"#;
1550 let plugin = CodeParserPlugin;
1551 let entities = plugin.extract_entities(code, "App.kt");
1552 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1553 eprintln!("Kotlin entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1554 assert!(names.contains(&"UserService"), "got: {:?}", names);
1555 assert!(names.contains(&"greet"), "got: {:?}", names);
1556 assert!(names.contains(&"Repository"), "got: {:?}", names);
1557 assert!(names.contains(&"findById"), "got: {:?}", names);
1558 assert!(names.contains(&"AppConfig"), "got: {:?}", names);
1559 assert!(names.contains(&"topLevel"), "got: {:?}", names);
1560 }
1561
1562 #[test]
1563 fn test_xml_entity_extraction() {
1564 let code = r#"<?xml version="1.0" encoding="UTF-8"?>
1565<project>
1566 <groupId>com.example</groupId>
1567 <artifactId>my-app</artifactId>
1568 <dependencies>
1569 <dependency>
1570 <groupId>junit</groupId>
1571 <artifactId>junit</artifactId>
1572 </dependency>
1573 </dependencies>
1574 <build>
1575 <plugins>
1576 <plugin>
1577 <groupId>org.apache.maven</groupId>
1578 </plugin>
1579 </plugins>
1580 </build>
1581</project>
1582"#;
1583 let plugin = CodeParserPlugin;
1584 let entities = plugin.extract_entities(code, "pom.xml");
1585 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1586 eprintln!("XML entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1587 assert!(names.contains(&"project"), "got: {:?}", names);
1588 assert!(names.contains(&"dependencies"), "got: {:?}", names);
1589 assert!(names.contains(&"build"), "got: {:?}", names);
1590 }
1591
1592 #[test]
1593 fn test_arrow_callback_scope_boundary_typescript() {
1594 let code = r#"
1598const activeQueues = [
1599 { queue: queues.fooQueue, processor: foo.process },
1600];
1601
1602activeQueues.forEach((handler: any) => {
1603 const queue = handler.queue;
1604 let retries = 0;
1605
1606 class QueueHandler {
1607 handle() { return queue; }
1608 }
1609
1610 function createHandler() {
1611 return new QueueHandler();
1612 }
1613
1614 queue.process((job) => {
1615 const orderId = job.data.orderId;
1616 return orderId;
1617 });
1618});
1619
1620function handleFailure(job: any, err: any) {
1621 console.error('failed', err);
1622}
1623"#;
1624 let plugin = CodeParserPlugin;
1625 let entities = plugin.extract_entities(code, "process.ts");
1626 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1627 let top_level: Vec<&str> = entities
1628 .iter()
1629 .filter(|e| e.parent_id.is_none())
1630 .map(|e| e.name.as_str())
1631 .collect();
1632
1633 assert!(top_level.contains(&"activeQueues"), "got: {:?}", top_level);
1635 assert!(top_level.contains(&"handleFailure"), "got: {:?}", top_level);
1636
1637 assert!(names.contains(&"QueueHandler"), "got: {:?}", names);
1639 assert!(names.contains(&"handle"), "got: {:?}", names);
1640 assert!(names.contains(&"createHandler"), "got: {:?}", names);
1641
1642 assert!(!names.contains(&"queue"), "got: {:?}", names);
1644 assert!(!names.contains(&"retries"), "got: {:?}", names);
1645 assert!(!names.contains(&"orderId"), "got: {:?}", names);
1646 }
1647
1648 #[test]
1649 fn test_top_level_iife_wrapper_still_extracts_typescript_entities() {
1650 let code = r#"
1651function factory() {
1652 class Foo {
1653 method(): number {
1654 return 1;
1655 }
1656 }
1657
1658 function bar(): Foo {
1659 return new Foo();
1660 }
1661}
1662
1663factory();
1664"#;
1665 let plugin = CodeParserPlugin;
1666 let entities = plugin.extract_entities(code, "wrapped.ts");
1667 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1668 assert!(
1669 names.contains(&"factory"),
1670 "Should find top-level wrapper function, got: {:?}",
1671 names
1672 );
1673 assert!(
1674 names.contains(&"Foo"),
1675 "Should find class inside top-level wrapper, got: {:?}",
1676 names
1677 );
1678 assert!(
1679 names.contains(&"bar"),
1680 "Should find function inside top-level wrapper, got: {:?}",
1681 names
1682 );
1683 }
1684
1685 #[test]
1686 fn test_top_level_iife_still_extracts_typescript_entities() {
1687 let code = r#"
1688(() => {
1689 class Foo {
1690 method(): number {
1691 return 1;
1692 }
1693 }
1694
1695 function bar(): Foo {
1696 return new Foo();
1697 }
1698})();
1699"#;
1700 let plugin = CodeParserPlugin;
1701 let entities = plugin.extract_entities(code, "iife.ts");
1702 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1703 assert!(
1704 names.contains(&"Foo"),
1705 "Should find class inside top-level IIFE, got: {:?}",
1706 names
1707 );
1708 assert!(
1709 names.contains(&"bar"),
1710 "Should find function inside top-level IIFE, got: {:?}",
1711 names
1712 );
1713 }
1714
1715 #[test]
1716 fn test_function_locals_not_extracted_as_nested_entities_typescript() {
1717 let code = r#"
1718export default function foo() {
1719 const x = 1;
1720 return x;
1721}
1722"#;
1723 let plugin = CodeParserPlugin;
1724 let entities = plugin.extract_entities(code, "default-export.ts");
1725 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1726 assert!(
1727 names.contains(&"foo"),
1728 "Should find exported function, got: {:?}",
1729 names
1730 );
1731 assert!(
1732 !names.contains(&"x"),
1733 "Local inside function should not be extracted as an entity, got: {:?}",
1734 names
1735 );
1736 }
1737
1738 #[test]
1739 fn test_function_expression_scope_boundary_typescript() {
1740 let code = r#"
1743const foo = function namedExpr(x: number) {
1744 const inner = x + 1;
1745 return inner;
1746};
1747
1748const bar = function(y: number) {
1749 const local = y * 2;
1750 return local;
1751};
1752
1753const items = [1, 2, 3];
1754
1755items.forEach(function process(item) {
1756 const doubled = item * 2;
1757 console.log(doubled);
1758});
1759"#;
1760 let plugin = CodeParserPlugin;
1761 let entities = plugin.extract_entities(code, "funexpr.ts");
1762 let top_level: Vec<&str> = entities
1763 .iter()
1764 .filter(|e| e.parent_id.is_none())
1765 .map(|e| e.name.as_str())
1766 .collect();
1767 let find = |name: &str| entities.iter().find(|e| e.name == name).unwrap();
1768 let all_names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1769
1770 assert!(top_level.contains(&"foo"), "got: {:?}", top_level);
1773 assert!(top_level.contains(&"bar"), "got: {:?}", top_level);
1774 assert!(top_level.contains(&"items"), "got: {:?}", top_level);
1775 assert_eq!(find("foo").entity_type, "function");
1776 assert_eq!(find("bar").entity_type, "function");
1777 assert_eq!(find("items").entity_type, "variable");
1778
1779 assert!(!all_names.contains(&"inner"), "got: {:?}", all_names);
1781 assert!(!all_names.contains(&"local"), "got: {:?}", all_names);
1782 assert!(!all_names.contains(&"doubled"), "got: {:?}", all_names);
1783
1784 assert!(!top_level.contains(&"process"), "got: {:?}", top_level);
1786 }
1787
1788 #[test]
1789 fn test_variable_assigned_arrow_extracts_inner_entities() {
1790 let code = r#"
1793const handler = () => {
1794 class Inner {
1795 run() { return 1; }
1796 }
1797
1798 function make() {
1799 return new Inner();
1800 }
1801
1802 const local = 42;
1803};
1804"#;
1805 let plugin = CodeParserPlugin;
1806 let entities = plugin.extract_entities(code, "assigned.ts");
1807 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1808 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1809
1810 assert_eq!(handler.entity_type, "function");
1811 assert!(names.contains(&"handler"), "got: {:?}", names);
1812 assert!(names.contains(&"Inner"), "got: {:?}", names);
1813 assert!(names.contains(&"run"), "got: {:?}", names);
1814 assert!(names.contains(&"make"), "got: {:?}", names);
1815 assert!(!names.contains(&"local"), "got: {:?}", names);
1816 }
1817
1818 #[test]
1819 fn test_variable_assigned_function_expression_extracts_inner_entities() {
1820 let code = r#"
1822const handler = function() {
1823 class Inner {}
1824 function make() { return new Inner(); }
1825 const local = 42;
1826};
1827"#;
1828 let plugin = CodeParserPlugin;
1829 let entities = plugin.extract_entities(code, "funexpr-inner.ts");
1830 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1831 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1832
1833 assert_eq!(handler.entity_type, "function");
1834 assert!(names.contains(&"handler"), "got: {:?}", names);
1835 assert!(names.contains(&"Inner"), "got: {:?}", names);
1836 assert!(names.contains(&"make"), "got: {:?}", names);
1837 assert!(!names.contains(&"local"), "got: {:?}", names);
1838 }
1839
1840 #[test]
1841 fn test_let_assigned_arrow_stays_variable_typescript() {
1842 let code = r#"
1843let handler = () => {
1844 return 42;
1845};
1846"#;
1847 let plugin = CodeParserPlugin;
1848 let entities = plugin.extract_entities(code, "let-assigned.ts");
1849 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1850
1851 assert_eq!(handler.entity_type, "variable");
1852 }
1853
1854 #[test]
1855 fn test_const_assigned_arrow_promoted_to_function_javascript() {
1856 let code = r#"
1857const handler = () => {
1858 return 42;
1859};
1860"#;
1861 let plugin = CodeParserPlugin;
1862 let entities = plugin.extract_entities(code, "handler.js");
1863 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1864
1865 assert_eq!(handler.entity_type, "function");
1866 }
1867
1868 #[test]
1869 fn test_js_ts_multi_declarator_promotes_each_const_initializer() {
1870 let code = r#"
1871const value = 1, handler = () => value;
1872const first = () => 1, second = 2;
1873"#;
1874 let plugin = CodeParserPlugin;
1875 let entities = plugin.extract_entities(code, "sample.ts");
1876 let find = |name: &str| {
1877 entities.iter().find(|e| e.name == name).unwrap_or_else(|| {
1878 panic!(
1879 "missing {name}; got: {:?}",
1880 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>()
1881 )
1882 })
1883 };
1884
1885 assert_eq!(find("value").entity_type, "variable");
1886 assert_eq!(find("handler").entity_type, "function");
1887 assert_eq!(find("first").entity_type, "function");
1888 assert_eq!(find("second").entity_type, "variable");
1889 }
1890
1891 #[test]
1892 fn test_suppressed_multi_declarator_traverses_skipped_initializers() {
1893 let code = r#"
1894function wrapper() {
1895 const holder = class {
1896 run() { return 1; }
1897 }, handler = () => {
1898 class Inner {
1899 go() { return 2; }
1900 }
1901 }, value = 1;
1902}
1903"#;
1904 let plugin = CodeParserPlugin;
1905 let entities = plugin.extract_entities(code, "sample.ts");
1906 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1907 let find = |name: &str| {
1908 entities.iter().find(|e| e.name == name).unwrap_or_else(|| {
1909 panic!(
1910 "missing {name}; got: {:?}",
1911 entities
1912 .iter()
1913 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1914 .collect::<Vec<_>>()
1915 )
1916 })
1917 };
1918
1919 assert_eq!(find("wrapper").entity_type, "function");
1920 assert_eq!(find("handler").entity_type, "function");
1921 assert!(names.contains(&"run"), "got: {:?}", names);
1922 assert!(names.contains(&"Inner"), "got: {:?}", names);
1923 assert!(names.contains(&"go"), "got: {:?}", names);
1924 assert!(!names.contains(&"holder"), "got: {:?}", names);
1925 assert!(!names.contains(&"value"), "got: {:?}", names);
1926 }
1927
1928 #[test]
1929 fn test_go_var_declaration() {
1930 let code = r#"package featuremgmt
1931
1932type FeatureFlag struct {
1933 Name string
1934 Description string
1935 Stage string
1936}
1937
1938var standardFeatureFlags = []FeatureFlag{
1939 {
1940 Name: "panelTitleSearch",
1941 Description: "Search for dashboards using panel title",
1942 Stage: "PublicPreview",
1943 },
1944}
1945
1946func GetFlags() []FeatureFlag {
1947 return standardFeatureFlags
1948}
1949"#;
1950 let plugin = CodeParserPlugin;
1951 let entities = plugin.extract_entities(code, "flags.go");
1952 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1953 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1954 eprintln!("Go entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
1955
1956 assert!(names.contains(&"FeatureFlag"), "Should find type FeatureFlag, got: {:?}", names);
1957 assert!(names.contains(&"standardFeatureFlags"), "Should find var standardFeatureFlags, got: {:?}", names);
1958 assert!(names.contains(&"GetFlags"), "Should find func GetFlags, got: {:?}", names);
1959 }
1960
1961 #[test]
1962 fn test_go_grouped_var_declaration() {
1963 let code = r#"package test
1964
1965var (
1966 simple = 42
1967 flags = []string{"a", "b"}
1968)
1969
1970const (
1971 x = 1
1972 y = 2
1973)
1974
1975func main() {}
1976"#;
1977 let plugin = CodeParserPlugin;
1978 let entities = plugin.extract_entities(code, "test.go");
1979 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1980 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1981 eprintln!("Go grouped entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
1982
1983 assert!(names.contains(&"flags") || names.contains(&"simple"), "Should find grouped var, got: {:?}", names);
1984 assert!(names.contains(&"x"), "Should find grouped const x, got: {:?}", names);
1985 assert!(names.contains(&"main"), "Should find func main, got: {:?}", names);
1986 }
1987
1988 #[test]
1989 fn test_dart_entity_extraction() {
1990 let code = r#"
1991import 'dart:math';
1992
1993class Calculator {
1994 final String name;
1995
1996 Calculator(this.name);
1997
1998 Calculator.withDefault() : name = 'default';
1999
2000 factory Calculator.create(String name) {
2001 return Calculator(name);
2002 }
2003
2004 int add(int a, int b) {
2005 return a + b;
2006 }
2007
2008 int get doubleAdd => add(1, 1) * 2;
2009
2010 set label(String value) {
2011 // no-op
2012 }
2013
2014 int operator +(Calculator other) {
2015 return 0;
2016 }
2017}
2018
2019mixin Loggable {
2020 void log(String message) {
2021 print(message);
2022 }
2023}
2024
2025extension StringExt on String {
2026 bool get isBlank => trim().isEmpty;
2027}
2028
2029enum Status {
2030 active,
2031 inactive;
2032
2033 String display() => name.toUpperCase();
2034}
2035
2036typedef Callback = void Function(int);
2037
2038int add(int a, int b) {
2039 return a + b;
2040}
2041
2042extension type Wrapper(int value) implements int {}
2043"#;
2044 let plugin = CodeParserPlugin;
2045 let entities = plugin.extract_entities(code, "calculator.dart");
2046 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2047 eprintln!(
2048 "Dart entities: {:?}",
2049 entities
2050 .iter()
2051 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
2052 .collect::<Vec<_>>()
2053 );
2054
2055 assert!(names.contains(&"Calculator"), "Should find class, got: {:?}", names);
2057 assert!(names.contains(&"Loggable"), "Should find mixin, got: {:?}", names);
2058 assert!(names.contains(&"StringExt"), "Should find extension, got: {:?}", names);
2059 assert!(names.contains(&"Status"), "Should find enum, got: {:?}", names);
2060 assert!(names.contains(&"Callback"), "Should find typedef, got: {:?}", names);
2061 assert!(names.contains(&"add"), "Should find top-level function, got: {:?}", names);
2062 assert!(names.contains(&"Wrapper"), "Should find extension type, got: {:?}", names);
2063
2064 let add_method = entities.iter().find(|e| e.name == "add" && e.parent_id.is_some());
2066 assert!(add_method.is_some(), "Should find add method inside Calculator");
2067 assert_eq!(add_method.unwrap().entity_type, "method");
2068
2069 let unnamed_ctor = entities.iter().find(|e| e.name == "Calculator" && e.entity_type == "constructor");
2071 assert!(unnamed_ctor.is_some(), "Should find unnamed constructor");
2072 let named_ctor = entities.iter().find(|e| e.name == "Calculator.withDefault");
2073 assert!(named_ctor.is_some(), "Should find named constructor Calculator.withDefault, got: {:?}", names);
2074 assert_eq!(named_ctor.unwrap().entity_type, "constructor");
2075 assert_ne!(unnamed_ctor.unwrap().id, named_ctor.unwrap().id, "Named and unnamed constructors must have different entity IDs");
2076
2077 let factory_ctor = entities.iter().find(|e| e.name == "Calculator.create");
2079 assert!(factory_ctor.is_some(), "Should find factory constructor Calculator.create, got: {:?}", names);
2080 assert_eq!(factory_ctor.unwrap().entity_type, "constructor");
2081
2082 let getter = entities.iter().find(|e| e.name == "doubleAdd");
2084 assert!(getter.is_some(), "Should find getter doubleAdd");
2085 assert_eq!(getter.unwrap().entity_type, "getter");
2086
2087 let setter = entities.iter().find(|e| e.name == "label");
2088 assert!(setter.is_some(), "Should find setter label");
2089 assert_eq!(setter.unwrap().entity_type, "setter");
2090
2091 let operator = entities.iter().find(|e| e.name == "operator +");
2092 assert!(operator.is_some(), "Should find operator +");
2093 assert_eq!(operator.unwrap().entity_type, "method");
2094
2095 let log_method = entities.iter().find(|e| e.name == "log");
2097 assert!(log_method.is_some(), "Should find log in Loggable");
2098 assert!(log_method.unwrap().parent_id.is_some(), "log should have parent_id");
2099
2100 let callback = entities.iter().find(|e| e.name == "Callback").unwrap();
2102 assert_eq!(callback.entity_type, "type", "typedef should map to 'type'");
2103
2104 let loggable = entities.iter().find(|e| e.name == "Loggable").unwrap();
2105 assert_eq!(loggable.entity_type, "mixin");
2106
2107 let ext = entities.iter().find(|e| e.name == "StringExt").unwrap();
2108 assert_eq!(ext.entity_type, "extension");
2109
2110 let wrapper = entities.iter().find(|e| e.name == "Wrapper").unwrap();
2111 assert_eq!(wrapper.entity_type, "extension");
2112 }
2113
2114 #[test]
2115 #[cfg(feature = "lang-sql")]
2116 fn test_sql_entity_extraction() {
2117 let code = r#"
2118CREATE TABLE users (id INT PRIMARY KEY, name TEXT);
2119CREATE VIEW active_users AS SELECT * FROM users WHERE active;
2120CREATE FUNCTION add(a INT, b INT) RETURNS INT AS $$ BEGIN RETURN a + b; END; $$ LANGUAGE plpgsql;
2121CREATE INDEX idx_name ON users(name);
2122CREATE TYPE mood AS ENUM ('sad', 'happy');
2123CREATE SCHEMA myapp;
2124CREATE MATERIALIZED VIEW mv AS SELECT 1;
2125CREATE TABLE billing.invoices (id INT);
2126"#;
2127 let plugin = CodeParserPlugin;
2128 let entities = plugin.extract_entities(code, "schema.sql");
2129 let by_name = |n: &str| entities.iter().find(|e| e.name == n);
2130
2131 assert_eq!(by_name("users").map(|e| e.entity_type.as_str()), Some("table"));
2133 assert_eq!(by_name("active_users").map(|e| e.entity_type.as_str()), Some("view"));
2134 assert_eq!(by_name("add").map(|e| e.entity_type.as_str()), Some("function"));
2135 assert_eq!(by_name("mood").map(|e| e.entity_type.as_str()), Some("type"));
2136 assert_eq!(by_name("mv").map(|e| e.entity_type.as_str()), Some("view"));
2137 assert_eq!(
2138 by_name("billing.invoices").map(|e| e.entity_type.as_str()),
2139 Some("table"),
2140 "schema-qualified table name should be preserved"
2141 );
2142
2143 assert_eq!(
2145 by_name("idx_name").map(|e| e.entity_type.as_str()),
2146 Some("index"),
2147 "index should be named idx_name, not the table it indexes"
2148 );
2149 assert_eq!(by_name("myapp").map(|e| e.entity_type.as_str()), Some("schema"));
2150 }
2151
2152 #[test]
2153 fn test_dart_top_level_function_includes_body() {
2154 let code = r#"
2155int add(int a, int b) {
2156 return a + b;
2157}
2158
2159String greet(String name) => 'Hello, $name!';
2160"#;
2161 let plugin = CodeParserPlugin;
2162 let entities = plugin.extract_entities(code, "funcs.dart");
2163 eprintln!(
2164 "Dart top-level: {:?}",
2165 entities
2166 .iter()
2167 .map(|e| (&e.name, &e.entity_type, &e.content))
2168 .collect::<Vec<_>>()
2169 );
2170
2171 let add_fn = entities.iter().find(|e| e.name == "add").unwrap();
2172 assert!(
2173 add_fn.content.contains("return a + b"),
2174 "Top-level function content should include the body, got: {:?}",
2175 add_fn.content
2176 );
2177
2178 let greet_fn = entities.iter().find(|e| e.name == "greet").unwrap();
2179 assert!(
2180 greet_fn.content.contains("Hello"),
2181 "Expression body should be included, got: {:?}",
2182 greet_fn.content
2183 );
2184
2185 let code_v2 = r#"
2187int add(int a, int b) {
2188 return a * b;
2189}
2190
2191String greet(String name) => 'Hello, $name!';
2192"#;
2193 let entities_v2 = plugin.extract_entities(code_v2, "funcs.dart");
2194 let add_v2 = entities_v2.iter().find(|e| e.name == "add").unwrap();
2195 assert_ne!(
2196 add_fn.content_hash, add_v2.content_hash,
2197 "Body change should produce different content_hash"
2198 );
2199
2200 let greet_v2 = entities_v2.iter().find(|e| e.name == "greet").unwrap();
2202 assert_eq!(
2203 greet_fn.content_hash, greet_v2.content_hash,
2204 "Unchanged function should keep the same content_hash"
2205 );
2206 }
2207
2208 #[test]
2209 fn test_dart_renamed_named_constructor_same_structural_hash() {
2210 let code_a = r#"
2211class Foo {
2212 Foo.fromJson(Map<String, dynamic> json) {
2213 print(json);
2214 }
2215}
2216"#;
2217 let code_b = r#"
2218class Foo {
2219 Foo.fromMap(Map<String, dynamic> json) {
2220 print(json);
2221 }
2222}
2223"#;
2224 let plugin = CodeParserPlugin;
2225 let entities_a = plugin.extract_entities(code_a, "a.dart");
2226 let entities_b = plugin.extract_entities(code_b, "b.dart");
2227
2228 let ctor_a = entities_a.iter().find(|e| e.name == "Foo.fromJson").unwrap();
2229 let ctor_b = entities_b.iter().find(|e| e.name == "Foo.fromMap").unwrap();
2230
2231 assert_eq!(
2232 ctor_a.structural_hash, ctor_b.structural_hash,
2233 "Renamed named constructor with identical body should have same structural_hash"
2234 );
2235 assert_ne!(
2236 ctor_a.content_hash, ctor_b.content_hash,
2237 "Content hash should differ since raw content includes the name"
2238 );
2239 }
2240
2241 #[test]
2242 fn test_dart_top_level_getter_setter() {
2243 let code = r#"
2244int _value = 0;
2245
2246int get currentValue {
2247 return _value;
2248}
2249
2250set currentValue(int v) {
2251 _value = v;
2252}
2253"#;
2254 let plugin = CodeParserPlugin;
2255 let entities = plugin.extract_entities(code, "accessors.dart");
2256 eprintln!(
2257 "Dart top-level accessors: {:?}",
2258 entities
2259 .iter()
2260 .map(|e| (&e.name, &e.entity_type, &e.content))
2261 .collect::<Vec<_>>()
2262 );
2263
2264 let getter = entities.iter().find(|e| e.name == "currentValue" && e.entity_type == "getter");
2265 assert!(getter.is_some(), "Should find top-level getter, got: {:?}",
2266 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2267 assert!(
2268 getter.unwrap().content.contains("return _value"),
2269 "Top-level getter content should include the body"
2270 );
2271 assert!(getter.unwrap().parent_id.is_none(), "Top-level getter should have no parent");
2272
2273 let setter = entities.iter().find(|e| e.name == "currentValue" && e.entity_type == "function");
2277 assert!(setter.is_some(), "Should find top-level setter as function, got: {:?}",
2278 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2279 assert!(
2280 setter.unwrap().content.contains("_value = v"),
2281 "Top-level setter content should include the body"
2282 );
2283 }
2284
2285 #[test]
2286 fn test_dart_field_entity_type() {
2287 let code = r#"
2288class Config {
2289 final String name;
2290 static const int maxRetries = 3;
2291}
2292"#;
2293 let plugin = CodeParserPlugin;
2294 let entities = plugin.extract_entities(code, "config.dart");
2295 eprintln!(
2296 "Dart fields: {:?}",
2297 entities
2298 .iter()
2299 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
2300 .collect::<Vec<_>>()
2301 );
2302
2303 let name_field = entities.iter().find(|e| e.name == "name" && e.parent_id.is_some());
2304 assert!(name_field.is_some(), "Should find field 'name', got: {:?}",
2305 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2306 assert_eq!(name_field.unwrap().entity_type, "field");
2307
2308 let max_retries = entities.iter().find(|e| e.name == "maxRetries");
2309 assert!(max_retries.is_some(), "Should find field 'maxRetries', got: {:?}",
2310 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2311 assert_eq!(max_retries.unwrap().entity_type, "field");
2312 }
2313
2314 #[test]
2315 fn test_dart_identifier_list_fields() {
2316 let code = r#"
2320abstract class Shape {
2321 abstract double x, y;
2322 abstract String label;
2323}
2324"#;
2325 let plugin = CodeParserPlugin;
2326 let entities = plugin.extract_entities(code, "shape.dart");
2327 eprintln!(
2328 "Dart identifier_list fields: {:?}",
2329 entities
2330 .iter()
2331 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
2332 .collect::<Vec<_>>()
2333 );
2334
2335 let x_field = entities.iter().find(|e| e.name == "x");
2336 assert!(x_field.is_some(), "Should find field 'x' from identifier_list, got: {:?}",
2337 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2338 assert_eq!(x_field.unwrap().entity_type, "field");
2339 assert!(x_field.unwrap().parent_id.is_some(), "field 'x' should be nested under Shape");
2340
2341 let label_field = entities.iter().find(|e| e.name == "label");
2342 assert!(label_field.is_some(), "Should find field 'label' from single-element identifier_list, got: {:?}",
2343 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2344 assert_eq!(label_field.unwrap().entity_type, "field");
2345 }
2346
2347 #[test]
2348 fn test_ocaml_entity_extraction() {
2349 let code = r#"
2350type color = Red | Green | Blue
2351
2352type point = {
2353 x : float;
2354 y : float;
2355}
2356
2357exception Not_found of string
2358
2359let greet name =
2360 Printf.printf "Hello, %s!\n" name
2361
2362let add a b = a + b
2363
2364let version = "1.0"
2365
2366let color_to_string = function
2367 | Red -> "red"
2368 | Blue -> "blue"
2369
2370let inc = fun x -> x + 1
2371
2372module MyModule = struct
2373 let helper x = x * 2
2374end
2375
2376module type Printable = sig
2377 val to_string : 'a -> string
2378end
2379
2380external caml_input : in_channel -> bytes -> int -> int -> int = "caml_input"
2381
2382class point_class x_init = object
2383 val mutable x = x_init
2384 method get_x = x
2385end
2386
2387class type measurable = object
2388 method measure : float
2389end
2390"#;
2391 let plugin = CodeParserPlugin;
2392 let entities = plugin.extract_entities(code, "example.ml");
2393 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2394 eprintln!("OCaml entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2395
2396 let find = |name: &str| entities.iter().find(|e| e.name == name)
2397 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2398
2399 assert_eq!(find("color").entity_type, "type");
2400 assert_eq!(find("point").entity_type, "type");
2401 assert_eq!(find("Not_found").entity_type, "exception");
2402 assert_eq!(find("greet").entity_type, "function");
2403 assert_eq!(find("add").entity_type, "function");
2404 assert_eq!(find("version").entity_type, "value");
2405 assert_eq!(find("color_to_string").entity_type, "function");
2406 assert_eq!(find("inc").entity_type, "function");
2407 assert_eq!(find("MyModule").entity_type, "module");
2408 assert_eq!(find("Printable").entity_type, "module_type");
2409 assert_eq!(find("caml_input").entity_type, "external");
2410 assert_eq!(find("point_class").entity_type, "class");
2411 assert_eq!(find("measurable").entity_type, "class_type");
2412 }
2413
2414 #[test]
2415 fn test_ocaml_nested_module_entities() {
2416 let code = r#"
2417module Outer = struct
2418 let x = 42
2419
2420 module Inner = struct
2421 let y = 0
2422 end
2423end
2424"#;
2425 let plugin = CodeParserPlugin;
2426 let entities = plugin.extract_entities(code, "nested.ml");
2427 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2428 eprintln!("OCaml nested: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
2429
2430 let find = |name: &str| entities.iter().find(|e| e.name == name)
2431 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2432
2433 let outer = find("Outer");
2434 let x = find("x");
2435 let inner = find("Inner");
2436 let y = find("y");
2437
2438 assert_eq!(outer.entity_type, "module");
2439 assert_eq!(x.entity_type, "value");
2440 assert_eq!(inner.entity_type, "module");
2441 assert_eq!(y.entity_type, "value");
2442
2443 assert!(x.parent_id.as_ref().is_some_and(|p| p == &outer.id), "x should be nested under Outer");
2444 assert!(inner.parent_id.as_ref().is_some_and(|p| p == &outer.id), "Inner should be nested under Outer");
2445 assert!(y.parent_id.as_ref().is_some_and(|p| p == &inner.id), "y should be nested under Inner");
2446 }
2447
2448 #[test]
2449 fn test_ocaml_interface_entity_extraction() {
2450 let code = r#"
2451type t
2452
2453val create : string -> t
2454val to_string : t -> string
2455
2456exception Invalid_input of string
2457
2458module type Serializable = sig
2459 val serialize : t -> string
2460end
2461"#;
2462 let plugin = CodeParserPlugin;
2463 let entities = plugin.extract_entities(code, "example.mli");
2464 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2465 eprintln!("OCaml interface entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2466
2467 let find = |name: &str| entities.iter().find(|e| e.name == name)
2468 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2469
2470 assert_eq!(find("t").entity_type, "type");
2471 assert_eq!(find("create").entity_type, "val");
2472 assert_eq!(find("to_string").entity_type, "val");
2473 assert_eq!(find("Invalid_input").entity_type, "exception");
2474 assert_eq!(find("Serializable").entity_type, "module_type");
2475 }
2476
2477 #[test]
2478 fn test_ocaml_mutual_recursion_let() {
2479 let code = r#"
2480let rec even n = (n = 0) || odd (n - 1)
2481and odd n = (n <> 0) && even (n - 1)
2482
2483let rec ping x = pong (x - 1)
2484and pong x = if x <= 0 then 0 else ping (x - 1)
2485"#;
2486 let plugin = CodeParserPlugin;
2487 let entities = plugin.extract_entities(code, "mutual.ml");
2488 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2489 eprintln!("OCaml mutual let: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2490
2491 let find = |name: &str| entities.iter().find(|e| e.name == name)
2492 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2493
2494 assert_eq!(find("even").entity_type, "function");
2495 assert_eq!(find("odd").entity_type, "function");
2496 assert_eq!(find("ping").entity_type, "function");
2497 assert_eq!(find("pong").entity_type, "function");
2498 }
2499
2500 #[test]
2501 fn test_ocaml_mutual_recursion_module() {
2502 let code = r#"
2503module rec A : sig val x : int end = struct
2504 let x = B.y + 1
2505end
2506and B : sig val y : int end = struct
2507 let y = 0
2508end
2509"#;
2510 let plugin = CodeParserPlugin;
2511 let entities = plugin.extract_entities(code, "mutual_mod.ml");
2512 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2513 eprintln!("OCaml mutual module: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
2514
2515 let find = |name: &str| entities.iter().find(|e| e.name == name)
2516 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2517
2518 let a = find("A");
2519 let b = find("B");
2520 assert_eq!(a.entity_type, "module");
2521 assert_eq!(b.entity_type, "module");
2522
2523 let x = find("x");
2524 let y = find("y");
2525 assert!(x.parent_id.as_ref().is_some_and(|p| p == &a.id), "x should be nested under A");
2526 assert!(y.parent_id.as_ref().is_some_and(|p| p == &b.id), "y should be nested under B");
2527 }
2528
2529 #[test]
2530 fn test_ocaml_destructured_let() {
2531 let code = r#"
2532let (a, b) = (1, 2)
2533
2534let { x; y } = point
2535
2536let simple = 42
2537"#;
2538 let plugin = CodeParserPlugin;
2539 let entities = plugin.extract_entities(code, "destruct.ml");
2540 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2541 eprintln!("OCaml destructured: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2542
2543 let find = |name: &str| entities.iter().find(|e| e.name == name)
2544 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2545
2546 assert_eq!(find("a").entity_type, "value");
2547 assert_eq!(find("b").entity_type, "value");
2548 assert_eq!(find("x").entity_type, "value");
2549 assert_eq!(find("y").entity_type, "value");
2550 assert_eq!(find("simple").entity_type, "value");
2551 }
2552
2553 #[test]
2554 fn test_ocaml_mutual_recursion_class() {
2555 let code = r#"
2556class foo = object
2557 method x = 1
2558end
2559and bar = object
2560 method y = 2
2561end
2562"#;
2563 let plugin = CodeParserPlugin;
2564 let entities = plugin.extract_entities(code, "classes.ml");
2565 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2566 eprintln!("OCaml mutual class: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2567
2568 let find = |name: &str| entities.iter().find(|e| e.name == name)
2569 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2570
2571 assert_eq!(find("foo").entity_type, "class");
2572 assert_eq!(find("bar").entity_type, "class");
2573 }
2574
2575 #[test]
2576 fn test_perl_entity_extraction() {
2577 let code = r#"package Foo::Bar;
2578
2579use strict;
2580use warnings;
2581
2582sub hello {
2583 my ($self, $name) = @_;
2584 print "Hello, $name!\n";
2585}
2586
2587sub _private_helper {
2588 return 42;
2589}
2590
25911;
2592"#;
2593 let plugin = CodeParserPlugin;
2594 let entities = plugin.extract_entities(code, "Foo/Bar.pm");
2595 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2596
2597 assert!(names.contains(&"Foo::Bar"), "got: {:?}", names);
2598 assert!(names.contains(&"hello"), "got: {:?}", names);
2599 assert!(names.contains(&"_private_helper"), "got: {:?}", names);
2600
2601 let find = |name: &str| entities.iter().find(|e| e.name == name)
2602 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2603
2604 assert_eq!(find("Foo::Bar").entity_type, "package");
2605 assert_eq!(find("hello").entity_type, "function");
2606 assert_eq!(find("_private_helper").entity_type, "function");
2607 }
2608
2609 #[test]
2610 fn test_fortran_entity_extraction() {
2611 let code = r#"module math_utils
2612 implicit none
2613contains
2614 function add(a, b) result(c)
2615 integer, intent(in) :: a, b
2616 integer :: c
2617 c = a + b
2618 end function add
2619
2620 subroutine greet()
2621 print *, "hello"
2622 end subroutine greet
2623end module math_utils
2624
2625program main
2626 implicit none
2627 print *, "hello"
2628end program main
2629"#;
2630 let plugin = CodeParserPlugin;
2631 let entities = plugin.extract_entities(code, "test.f90");
2632 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2633
2634 assert!(names.contains(&"math_utils"), "got: {:?}", names);
2635 assert!(names.contains(&"add"), "got: {:?}", names);
2636 assert!(names.contains(&"greet"), "got: {:?}", names);
2637 assert!(names.contains(&"main"), "got: {:?}", names);
2638
2639 let find = |name: &str| entities.iter().find(|e| e.name == name)
2640 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2641
2642 assert_eq!(find("math_utils").entity_type, "module");
2643 assert_eq!(find("add").entity_type, "function");
2644 assert_eq!(find("greet").entity_type, "subroutine");
2645 assert_eq!(find("main").entity_type, "program");
2646
2647 assert!(find("add").parent_id.is_some());
2649 assert!(find("greet").parent_id.is_some());
2650 }
2651
2652 #[test]
2653 fn test_scala_entity_extraction() {
2654 let code = r#"
2655package com.example
2656
2657import scala.collection.mutable
2658
2659class UserService(val name: String) {
2660 def getUsers(): List[User] = db.findAll()
2661
2662 def createUser(user: User): Unit = db.save(user)
2663
2664 private def validate(user: User): Boolean = true
2665}
2666
2667object UserService {
2668 def apply(name: String): UserService = new UserService(name)
2669
2670 val DefaultName: String = "default"
2671}
2672
2673trait Repository[T] {
2674 def findById(id: String): Option[T]
2675 def findAll(): List[T]
2676}
2677
2678case class User(id: String, name: String)
2679
2680type UserId = String
2681"#;
2682 let plugin = CodeParserPlugin;
2683 let entities = plugin.extract_entities(code, "UserService.scala");
2684 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2685 eprintln!("Scala entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2686
2687 assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
2688 assert!(names.contains(&"Repository"), "Should find trait Repository, got: {:?}", names);
2689 assert!(names.contains(&"getUsers"), "Should find method getUsers, got: {:?}", names);
2690 assert!(names.contains(&"createUser"), "Should find method createUser, got: {:?}", names);
2691
2692 let get_users = entities.iter().find(|e| e.name == "getUsers").unwrap();
2694 assert!(get_users.parent_id.is_some(), "getUsers should have parent_id");
2695 }
2696
2697 #[test]
2698 fn test_scala3_entity_extraction() {
2699 let code = r#"
2700package com.example
2701
2702enum Color:
2703 case Red, Green, Blue
2704
2705enum Planet(mass: Double, radius: Double):
2706 case Mercury extends Planet(3.303e+23, 2.4397e6)
2707 case Venus extends Planet(4.869e+24, 6.0518e6)
2708
2709object Main:
2710 def main(args: Array[String]): Unit =
2711 println("Hello, World!")
2712
2713trait Greeter:
2714 def greet(name: String): String
2715
2716given Greeter with
2717 def greet(name: String): String = s"Hello, $name!"
2718
2719extension (s: String)
2720 def shout: String = s.toUpperCase + "!"
2721
2722type Predicate[A] = A => Boolean
2723"#;
2724 let plugin = CodeParserPlugin;
2725 let entities = plugin.extract_entities(code, "Main.scala");
2726 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2727 eprintln!("Scala 3 entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2728
2729 assert!(names.contains(&"Color"), "Should find enum Color, got: {:?}", names);
2730 assert!(names.contains(&"Planet"), "Should find enum Planet, got: {:?}", names);
2731 assert!(names.contains(&"Main"), "Should find object Main, got: {:?}", names);
2732 assert!(names.contains(&"Greeter"), "Should find trait Greeter, got: {:?}", names);
2733 assert!(names.contains(&"Predicate"), "Should find type alias Predicate, got: {:?}", names);
2734 }
2735
2736 #[test]
2737 fn test_zig_entity_extraction() {
2738 let code = r#"
2739const std = @import("std");
2740
2741pub const Point = struct {
2742 x: i32,
2743 y: i32,
2744};
2745
2746pub const Color = enum {
2747 red,
2748 green,
2749 blue,
2750};
2751
2752const Person = struct {
2753 name: []const u8,
2754 age: u32,
2755};
2756
2757pub fn greet(name: []const u8) void {
2758 std.debug.print("Hello, {s}!\n", .{name});
2759}
2760
2761fn add(a: i32, b: i32) i32 {
2762 return a + b;
2763}
2764
2765pub fn main() !void {
2766 greet("world");
2767}
2768
2769test "basic addition" {
2770 const result = add(2, 3);
2771 _ = result;
2772}
2773"#;
2774 let plugin = CodeParserPlugin;
2775 let entities = plugin.extract_entities(code, "main.zig");
2776 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2777 let types: std::collections::HashMap<&str, &str> = entities
2778 .iter()
2779 .map(|e| (e.name.as_str(), e.entity_type.as_str()))
2780 .collect();
2781
2782 assert!(names.contains(&"greet"), "Should find greet, got: {:?}", names);
2783 assert!(names.contains(&"add"), "Should find add, got: {:?}", names);
2784 assert!(names.contains(&"main"), "Should find main, got: {:?}", names);
2785 assert!(names.contains(&"Point"), "Should find Point, got: {:?}", names);
2786 assert!(names.contains(&"Color"), "Should find Color, got: {:?}", names);
2787 assert!(names.contains(&"Person"), "Should find Person, got: {:?}", names);
2788
2789 assert_eq!(types["greet"], "function");
2790 assert_eq!(types["add"], "function");
2791 assert_eq!(types["Point"], "struct");
2792 assert_eq!(types["Color"], "enum");
2793 assert_eq!(types["Person"], "struct");
2794 }
2795
2796 #[test]
2797 #[cfg(feature = "lang-edn")]
2798 fn test_edn_deps_edn_map_entries() {
2799 let code = r#"{:deps {org.clojure/clojure {:mvn/version "1.11.0"}}
2800 :paths ["src" "resources"]
2801 :aliases {:dev {:extra-deps {cider/cider-nrepl {:mvn/version "0.28.5"}}}}}"#;
2802 let plugin = CodeParserPlugin;
2803 let entities = plugin.extract_entities(code, "deps.edn");
2804 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2805 let types: std::collections::HashMap<&str, &str> = entities
2806 .iter()
2807 .map(|e| (e.name.as_str(), e.entity_type.as_str()))
2808 .collect();
2809
2810 assert!(names.contains(&":deps"), "Should find :deps, got: {:?}", names);
2811 assert!(names.contains(&":paths"), "Should find :paths, got: {:?}", names);
2812 assert!(names.contains(&":aliases"), "Should find :aliases, got: {:?}", names);
2813 assert_eq!(names.len(), 3, "Should have exactly 3 entries, got: {:?}", names);
2814 assert_eq!(types[":deps"], "entry");
2815 assert_eq!(types[":paths"], "entry");
2816 assert_eq!(types[":aliases"], "entry");
2817 }
2818
2819 #[test]
2820 #[cfg(feature = "lang-edn")]
2821 fn test_edn_nested_map_values_not_extracted() {
2822 let code = r#"{:a {:b 1 :c 2} :d 3}"#;
2824 let plugin = CodeParserPlugin;
2825 let entities = plugin.extract_entities(code, "config.edn");
2826 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2827
2828 assert!(names.contains(&":a"), "Should find :a, got: {:?}", names);
2829 assert!(names.contains(&":d"), "Should find :d, got: {:?}", names);
2830 assert!(!names.contains(&":b"), "Inner :b should not be extracted");
2831 assert!(!names.contains(&":c"), "Inner :c should not be extracted");
2832 assert_eq!(names.len(), 2);
2833 }
2834
2835 #[test]
2836 #[cfg(feature = "lang-edn")]
2837 fn test_edn_non_map_top_level_forms_not_extracted() {
2838 let code = r#"["alpha" "beta"]"#;
2840 let plugin = CodeParserPlugin;
2841 let entities = plugin.extract_entities(code, "data.edn");
2842 assert_eq!(entities.len(), 0);
2843 }
2844
2845 #[test]
2846 #[cfg(feature = "lang-edn")]
2847 fn test_edn_symbol_keys_extracted() {
2848 let code = r#"{foo 1 bar 2}"#;
2849 let plugin = CodeParserPlugin;
2850 let entities = plugin.extract_entities(code, "sym.edn");
2851 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2852
2853 assert!(names.contains(&"foo"), "Should find foo, got: {:?}", names);
2854 assert!(names.contains(&"bar"), "Should find bar, got: {:?}", names);
2855 }
2856}