1mod entity_extractor;
2pub mod languages;
3
4use std::cell::RefCell;
5use std::collections::HashMap;
6
7use crate::model::entity::SemanticEntity;
8use crate::parser::plugin::SemanticParserPlugin;
9use crate::utils::hash::{content_hash, structural_hash};
10use languages::{get_all_code_extensions, get_language_config};
11use entity_extractor::extract_entities;
12
13pub struct CodeParserPlugin;
14
15thread_local! {
18 static PARSER_CACHE: RefCell<HashMap<&'static str, tree_sitter::Parser>> = RefCell::new(HashMap::new());
19}
20
21fn language_config_for_content(
22 content: &str,
23 file_path: &str,
24) -> Option<&'static languages::LanguageConfig> {
25 let ext = std::path::Path::new(file_path)
26 .extension()
27 .and_then(|e| e.to_str())
28 .map(|e| format!(".{}", e.to_lowercase()))
29 .unwrap_or_default();
30
31 get_language_config(&ext).or_else(|| {
32 detect_ext_from_content(content).and_then(|shebang_ext| get_language_config(&shebang_ext))
33 })
34}
35
36fn parse_tree(
37 config: &'static languages::LanguageConfig,
38 content: &str,
39) -> Option<tree_sitter::Tree> {
40 let language = (config.get_language)()?;
41
42 PARSER_CACHE.with(|cache| {
43 let mut cache = cache.borrow_mut();
44 let parser = cache.entry(config.id).or_insert_with(|| {
45 let mut p = tree_sitter::Parser::new();
46 let _ = p.set_language(&language);
47 p
48 });
49
50 parser.parse(content.as_bytes(), None)
51 })
52}
53
54fn has_non_comment_content(node: tree_sitter::Node, source: &[u8]) -> bool {
55 let mut worklist = Vec::new();
56 let mut cursor = node.walk();
57 worklist.extend(node.children(&mut cursor));
58
59 while let Some(node) = worklist.pop() {
60 if is_comment_node(node.kind()) {
61 continue;
62 }
63
64 if node.child_count() == 0 {
65 let start = node.start_byte();
66 let end = node.end_byte();
67 if start < end
68 && end <= source.len()
69 && source[start..end].iter().any(|b| !b.is_ascii_whitespace())
70 {
71 return true;
72 }
73 continue;
74 }
75
76 let mut cursor = node.walk();
77 worklist.extend(node.children(&mut cursor));
78 }
79
80 false
81}
82
83fn is_comment_node(kind: &str) -> bool {
84 matches!(
85 kind,
86 "comment" | "line_comment" | "block_comment" | "doc_comment" | "tag_comment"
87 )
88}
89
90fn shebang_line(content: &str) -> Option<&str> {
91 content
92 .strip_prefix("#!")
93 .map(|rest| rest.lines().next().unwrap_or(""))
94}
95
96impl SemanticParserPlugin for CodeParserPlugin {
97 fn id(&self) -> &str {
98 "code"
99 }
100
101 fn extensions(&self) -> &[&str] {
102 get_all_code_extensions()
103 }
104
105 fn extract_entities(&self, content: &str, file_path: &str) -> Vec<SemanticEntity> {
106 self.extract_entities_with_tree(content, file_path).0
107 }
108
109 fn extract_entities_with_tree(
110 &self,
111 content: &str,
112 file_path: &str,
113 ) -> (Vec<SemanticEntity>, Option<tree_sitter::Tree>) {
114 let Some(config) = language_config_for_content(content, file_path) else {
115 return (Vec::new(), None);
116 };
117
118 let Some(tree) = parse_tree(config, content) else {
119 return (Vec::new(), None);
120 };
121
122 let entities = extract_entities(&tree, file_path, config, content);
123 (entities, Some(tree))
124 }
125
126 fn structural_hash_content(&self, content: &str, file_path: &str) -> Option<String> {
127 let config = language_config_for_content(content, file_path)?;
128 let tree = parse_tree(config, content)?;
129 let shebang = shebang_line(content);
130 if shebang.is_none() && !has_non_comment_content(tree.root_node(), content.as_bytes()) {
131 return Some(String::new());
132 }
133 let structural = structural_hash(tree.root_node(), content.as_bytes());
134 match shebang {
135 Some(shebang) => Some(content_hash(&format!("shebang:{shebang}\n{structural}"))),
136 None => Some(structural),
137 }
138 }
139}
140
141use crate::parser::registry::detect_ext_from_content;
142
143#[cfg(test)]
144mod tests {
145 use super::*;
146
147 #[test]
148 fn test_java_entity_extraction() {
149 let code = r#"
150package com.example;
151
152import java.util.List;
153
154public class UserService {
155 private String name;
156
157 public UserService(String name) {
158 this.name = name;
159 }
160
161 public List<User> getUsers() {
162 return db.findAll();
163 }
164
165 public void createUser(User user) {
166 db.save(user);
167 }
168}
169
170interface Repository<T> {
171 T findById(String id);
172 List<T> findAll();
173}
174
175enum Status {
176 ACTIVE,
177 INACTIVE,
178 DELETED
179}
180"#;
181 let plugin = CodeParserPlugin;
182 let entities = plugin.extract_entities(code, "UserService.java");
183 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
184 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
185 eprintln!("Java entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
186
187 assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
188 assert!(names.contains(&"Repository"), "Should find interface Repository, got: {:?}", names);
189 assert!(names.contains(&"Status"), "Should find enum Status, got: {:?}", names);
190 }
191
192 #[test]
193 fn test_java_nested_methods() {
194 let code = r#"
195public class Calculator {
196 public int add(int a, int b) {
197 return a + b;
198 }
199
200 public int subtract(int a, int b) {
201 return a - b;
202 }
203}
204"#;
205 let plugin = CodeParserPlugin;
206 let entities = plugin.extract_entities(code, "Calculator.java");
207 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
208 eprintln!("Java nested: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
209
210 assert!(names.contains(&"Calculator"), "Should find Calculator class");
211 assert!(names.contains(&"add"), "Should find add method, got: {:?}", names);
212 assert!(names.contains(&"subtract"), "Should find subtract method, got: {:?}", names);
213
214 let add = entities.iter().find(|e| e.name == "add").unwrap();
216 assert!(add.parent_id.is_some(), "add should have parent_id");
217 }
218
219 #[test]
220 fn test_c_entity_extraction() {
221 let code = r#"
222#include <stdio.h>
223
224struct Point {
225 int x;
226 int y;
227};
228
229enum Color {
230 RED,
231 GREEN,
232 BLUE
233};
234
235typedef struct {
236 char name[50];
237 int age;
238} Person;
239
240void greet(const char* name) {
241 printf("Hello, %s!\n", name);
242}
243
244int add(int a, int b) {
245 return a + b;
246}
247
248int main() {
249 greet("world");
250 return 0;
251}
252"#;
253 let plugin = CodeParserPlugin;
254 let entities = plugin.extract_entities(code, "main.c");
255 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
256 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
257 eprintln!("C entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
258
259 assert!(names.contains(&"greet"), "Should find greet function, got: {:?}", names);
260 assert!(names.contains(&"add"), "Should find add function, got: {:?}", names);
261 assert!(names.contains(&"main"), "Should find main function, got: {:?}", names);
262 assert!(names.contains(&"Point"), "Should find Point struct, got: {:?}", names);
263 assert!(names.contains(&"Color"), "Should find Color enum, got: {:?}", names);
264 }
265
266 #[test]
267 fn test_c_function_locals_not_extracted() {
268 let code = r#"
269int global_count = 0;
270int helper(void);
271
272int main(void) {
273 int local = helper();
274 const char *message = "hello";
275 return local + global_count;
276}
277"#;
278 let plugin = CodeParserPlugin;
279 let entities = plugin.extract_entities(code, "main.c");
280 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
281
282 assert!(names.contains(&"global_count"), "got: {:?}", names);
283 assert!(names.contains(&"helper"), "got: {:?}", names);
284 assert!(names.contains(&"main"), "got: {:?}", names);
285 assert!(!names.contains(&"local"), "got: {:?}", names);
286 assert!(!names.contains(&"message"), "got: {:?}", names);
287 }
288
289 #[test]
290 fn test_cpp_entity_extraction() {
291 let code = "namespace math {\nclass Vector3 {\npublic:\n float length() const { return 0; }\n};\n}\nvoid greet() {}\n";
292 let plugin = CodeParserPlugin;
293 let entities = plugin.extract_entities(code, "main.cpp");
294 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
295 assert!(names.contains(&"math"), "got: {:?}", names);
296 assert!(names.contains(&"Vector3"), "got: {:?}", names);
297 assert!(names.contains(&"greet"), "got: {:?}", names);
298 }
299
300 #[test]
301 fn test_cpp_function_locals_not_extracted() {
302 let code = r#"
303int global_value = 1;
304int helper();
305
306int main() {
307 int local = helper();
308 auto lambda = []() {
309 int lambda_local = 3;
310 return lambda_local;
311 };
312 return local + lambda();
313}
314"#;
315 let plugin = CodeParserPlugin;
316 let entities = plugin.extract_entities(code, "main.cpp");
317 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
318
319 assert!(names.contains(&"global_value"), "got: {:?}", names);
320 assert!(names.contains(&"helper"), "got: {:?}", names);
321 assert!(names.contains(&"main"), "got: {:?}", names);
322 assert!(!names.contains(&"local"), "got: {:?}", names);
323 assert!(!names.contains(&"lambda"), "got: {:?}", names);
324 assert!(!names.contains(&"lambda_local"), "got: {:?}", names);
325 }
326
327 #[test]
328 fn test_ruby_entity_extraction() {
329 let code = "module Auth\n class User\n def greet\n \"hi\"\n end\n end\nend\ndef helper(x)\n x * 2\nend\n";
330 let plugin = CodeParserPlugin;
331 let entities = plugin.extract_entities(code, "auth.rb");
332 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
333 assert!(names.contains(&"Auth"), "got: {:?}", names);
334 assert!(names.contains(&"User"), "got: {:?}", names);
335 assert!(names.contains(&"helper"), "got: {:?}", names);
336 }
337
338 #[test]
339 fn test_csharp_entity_extraction() {
340 let code = "namespace MyApp {\npublic class User {\n public string GetName() { return \"\"; }\n}\npublic enum Role { Admin, User }\n}\n";
341 let plugin = CodeParserPlugin;
342 let entities = plugin.extract_entities(code, "Models.cs");
343 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
344 assert!(names.contains(&"MyApp"), "got: {:?}", names);
345 assert!(names.contains(&"User"), "got: {:?}", names);
346 assert!(names.contains(&"Role"), "got: {:?}", names);
347 }
348
349 #[test]
350 fn test_swift_entity_extraction() {
351 let code = r#"
352import Foundation
353
354typealias Handler = (Int) -> Void
355
356prefix operator ~~~
357
358class UserService {
359 var name: String
360
361 init(name: String) {
362 self.name = name
363 }
364
365 deinit {
366 print("freed")
367 }
368
369 func getUsers() -> [User] {
370 return db.findAll()
371 }
372}
373
374struct Point {
375 var x: Double
376 var y: Double
377
378 subscript(index: Int) -> Double {
379 return x + y + Double(index)
380 }
381}
382
383enum Status {
384 case active
385 case inactive
386 case deleted
387}
388
389protocol Repository {
390 associatedtype Canvas
391 func findById(id: String) -> Canvas?
392 func findAll() -> [Canvas]
393}
394
395func helper(x: Int) -> Int {
396 return x * 2
397}
398"#;
399 let plugin = CodeParserPlugin;
400 let entities = plugin.extract_entities(code, "UserService.swift");
401 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
402 eprintln!("Swift entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
403
404 assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
405 assert!(names.contains(&"Point"), "Should find struct Point, got: {:?}", names);
406 assert!(names.contains(&"Status"), "Should find enum Status, got: {:?}", names);
407 assert!(names.contains(&"Repository"), "Should find protocol Repository, got: {:?}", names);
408 assert!(names.contains(&"Canvas"), "Should find associatedtype Canvas, got: {:?}", names);
409 assert!(names.contains(&"Handler"), "Should find typealias Handler, got: {:?}", names);
410 assert!(names.contains(&"~~~"), "Should find custom operator ~~~, got: {:?}", names);
411 assert!(names.contains(&"init"), "Should find initializer init, got: {:?}", names);
412 assert!(names.contains(&"deinit"), "Should find deinitializer deinit, got: {:?}", names);
413 assert!(names.contains(&"subscript"), "Should find subscript, got: {:?}", names);
414 assert!(names.contains(&"helper"), "Should find function helper, got: {:?}", names);
415
416 let handler = entities.iter().find(|e| e.name == "Handler").unwrap();
417 assert_eq!(handler.entity_type, "type");
418 assert!(handler.parent_id.is_none());
419
420 let operator = entities.iter().find(|e| e.name == "~~~").unwrap();
421 assert_eq!(operator.entity_type, "operator");
422 assert!(operator.parent_id.is_none());
423
424 let user_service = entities.iter().find(|e| e.name == "UserService").unwrap();
425 assert_eq!(user_service.entity_type, "class");
426
427 let initializer = entities.iter().find(|e| e.name == "init").unwrap();
428 assert_eq!(initializer.entity_type, "init");
429 assert_eq!(initializer.parent_id.as_deref(), Some(user_service.id.as_str()));
430 assert_eq!(initializer.id, "UserService.swift::class::UserService::init");
431
432 let deinitializer = entities.iter().find(|e| e.name == "deinit").unwrap();
433 assert_eq!(deinitializer.entity_type, "deinit");
434 assert_eq!(deinitializer.parent_id.as_deref(), Some(user_service.id.as_str()));
435 assert_eq!(
436 deinitializer.id,
437 "UserService.swift::class::UserService::deinit"
438 );
439
440 let point = entities.iter().find(|e| e.name == "Point").unwrap();
441 assert_eq!(point.entity_type, "struct");
442
443 let subscript = entities.iter().find(|e| e.name == "subscript").unwrap();
444 assert_eq!(subscript.entity_type, "subscript");
445 assert_eq!(subscript.parent_id.as_deref(), Some(point.id.as_str()));
446 assert_eq!(
447 subscript.id,
448 "UserService.swift::struct::Point::subscript"
449 );
450
451 let status = entities.iter().find(|e| e.name == "Status").unwrap();
452 assert_eq!(status.entity_type, "enum");
453
454 let repository = entities.iter().find(|e| e.name == "Repository").unwrap();
455 assert_eq!(repository.entity_type, "protocol");
456 assert_eq!(repository.id, "UserService.swift::protocol::Repository");
457
458 let canvas = entities.iter().find(|e| e.name == "Canvas").unwrap();
459 assert_eq!(canvas.entity_type, "associatedtype");
460 assert_eq!(canvas.parent_id.as_deref(), Some(repository.id.as_str()));
461 assert_eq!(
462 canvas.id,
463 "UserService.swift::protocol::Repository::Canvas"
464 );
465 }
466
467 #[test]
468 fn test_swift_multi_binding_property_extraction() {
469 let code = r#"
470struct Point {
471 var x, y: Int
472}
473"#;
474 let plugin = CodeParserPlugin;
475 let entities = plugin.extract_entities(code, "Point.swift");
476 let point = entities.iter().find(|e| e.name == "Point").unwrap();
477 let properties: Vec<_> = entities
478 .iter()
479 .filter(|e| e.entity_type == "property")
480 .collect();
481
482 assert_eq!(
483 properties.iter().map(|e| e.name.as_str()).collect::<Vec<_>>(),
484 vec!["x", "y"]
485 );
486 assert!(properties
487 .iter()
488 .all(|property| property.parent_id.as_deref() == Some(point.id.as_str())));
489 assert_eq!(properties[0].content, "var x: Int");
490 assert_eq!(properties[1].content, "var y: Int");
491 }
492
493 #[test]
494 fn test_swift_multi_binding_property_content_is_per_binding() {
495 let typed_code = r#"
496struct Types {
497 var x: Int, y: String
498}
499"#;
500 let plugin = CodeParserPlugin;
501 let typed_entities = plugin.extract_entities(typed_code, "Types.swift");
502 let typed_properties: Vec<_> = typed_entities
503 .iter()
504 .filter(|e| e.entity_type == "property")
505 .collect();
506 assert_eq!(typed_properties[0].content, "var x: Int");
507 assert_eq!(typed_properties[1].content, "var y: String");
508
509 let mixed_code = r#"
510struct Mixed {
511 var x, y: Int, z: String
512}
513"#;
514 let mixed_entities = plugin.extract_entities(mixed_code, "Mixed.swift");
515 let mixed_properties: Vec<_> = mixed_entities
516 .iter()
517 .filter(|e| e.entity_type == "property")
518 .collect();
519 assert_eq!(mixed_properties[0].content, "var x: Int");
520 assert_eq!(mixed_properties[1].content, "var y: Int");
521 assert_eq!(mixed_properties[2].content, "var z: String");
522
523 let generic_code = r#"
524struct GenericTypes {
525 var lookup: Dictionary<String, Int>, count: Int
526}
527"#;
528 let generic_entities = plugin.extract_entities(generic_code, "GenericTypes.swift");
529 let generic_properties: Vec<_> = generic_entities
530 .iter()
531 .filter(|e| e.entity_type == "property")
532 .collect();
533 assert_eq!(
534 generic_properties[0].content,
535 "var lookup: Dictionary<String, Int>"
536 );
537 assert_eq!(generic_properties[1].content, "var count: Int");
538
539 let initializer_code = r#"
540struct Initializers {
541 var a = Foo(), b = Bar()
542}
543"#;
544 let initializer_entities = plugin.extract_entities(initializer_code, "Initializers.swift");
545 let initializer_properties: Vec<_> = initializer_entities
546 .iter()
547 .filter(|e| e.entity_type == "property")
548 .collect();
549 assert!(initializer_properties[0].content.contains("Foo()"));
550 assert!(!initializer_properties[0].content.contains("Bar()"));
551 assert!(initializer_properties[1].content.contains("Bar()"));
552 assert!(!initializer_properties[1].content.contains("Foo()"));
553
554 let constants_code = r#"
555struct Constants {
556 let first, second, third: Int
557}
558"#;
559 let constants_entities = plugin.extract_entities(constants_code, "Constants.swift");
560 let constants_properties: Vec<_> = constants_entities
561 .iter()
562 .filter(|e| e.entity_type == "property")
563 .collect();
564 assert_eq!(
565 constants_properties.iter().map(|e| e.name.as_str()).collect::<Vec<_>>(),
566 vec!["first", "second", "third"]
567 );
568 assert_eq!(constants_properties[0].content, "let first: Int");
569 assert_eq!(constants_properties[1].content, "let second: Int");
570 assert_eq!(constants_properties[2].content, "let third: Int");
571
572 let semicolon_code = r#"
573struct Semicolons {
574 var left, right: Int; var next: Int
575}
576"#;
577 let semicolon_entities = plugin.extract_entities(semicolon_code, "Semicolons.swift");
578 let semicolon_properties: Vec<_> = semicolon_entities
579 .iter()
580 .filter(|e| e.entity_type == "property")
581 .collect();
582 assert_eq!(semicolon_properties[0].content, "var left: Int");
583 assert_eq!(semicolon_properties[1].content, "var right: Int");
584 assert_eq!(semicolon_properties[2].content, "var next: Int");
585 }
586
587 #[test]
588 fn test_swift_body_locals_not_extracted_as_properties() {
589 let code = r#"
590class Cache {
591 var stored: Int
592
593 var computed: Int {
594 let computedLocal = stored + 1
595 func computedNested() -> Int {
596 return computedLocal
597 }
598 return computedNested()
599 }
600
601 var explicit: Int {
602 get {
603 let getterLocal = stored
604 func getterNested() -> Int {
605 return getterLocal
606 }
607 return getterNested()
608 }
609 }
610
611 init(seed: Int) {
612 let initial = seed
613 self.stored = initial
614 }
615
616 func value() -> Int {
617 let doubled = stored * 2
618 var offset = doubled + 1
619 func nested() -> Int {
620 let insideNested = offset
621 return insideNested
622 }
623 return nested()
624 }
625
626 subscript(index: Int) -> Int {
627 let shifted = index + stored
628 func subscriptNested() -> Int {
629 return shifted
630 }
631 return subscriptNested()
632 }
633
634 deinit {
635 let closing = stored
636 _ = closing
637 }
638}
639"#;
640 let plugin = CodeParserPlugin;
641 let entities = plugin.extract_entities(code, "Cache.swift");
642 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
643
644 assert!(names.contains(&"Cache"), "got: {:?}", names);
645 assert!(names.contains(&"stored"), "got: {:?}", names);
646 assert!(names.contains(&"computed"), "got: {:?}", names);
647 assert!(names.contains(&"explicit"), "got: {:?}", names);
648 assert!(names.contains(&"init"), "got: {:?}", names);
649 assert!(names.contains(&"value"), "got: {:?}", names);
650 assert!(names.contains(&"computedNested"), "got: {:?}", names);
651 assert!(names.contains(&"getterNested"), "got: {:?}", names);
652 assert!(names.contains(&"nested"), "got: {:?}", names);
653 assert!(names.contains(&"subscriptNested"), "got: {:?}", names);
654 assert!(names.contains(&"subscript"), "got: {:?}", names);
655 assert!(names.contains(&"deinit"), "got: {:?}", names);
656 assert!(!names.contains(&"Int"), "got: {:?}", names);
657
658 for local in [
659 "computedLocal",
660 "getterLocal",
661 "initial",
662 "doubled",
663 "offset",
664 "insideNested",
665 "shifted",
666 "closing",
667 ] {
668 assert!(!names.contains(&local), "{local} should not be an entity. Got: {:?}", names);
669 }
670 }
671
672 #[test]
673 fn test_swift_suppressed_multi_binding_initializers_are_traversed() {
674 let code = r#"
675func outer() {
676 let a = { func innerA() -> Int { 1 } },
677 b = { func innerB() -> Int { 2 } }
678}
679"#;
680 let plugin = CodeParserPlugin;
681 let entities = plugin.extract_entities(code, "Locals.swift");
682 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
683
684 assert!(names.contains(&"outer"), "got: {:?}", names);
685 assert!(names.contains(&"innerA"), "got: {:?}", names);
686 assert!(names.contains(&"innerB"), "got: {:?}", names);
687 assert!(!names.contains(&"a"), "local binding should stay suppressed: {:?}", names);
688 assert!(!names.contains(&"b"), "local binding should stay suppressed: {:?}", names);
689 }
690
691 #[test]
692 fn test_swift_conditional_compilation_inside_struct() {
693 let code = r#"
694import ArgumentParser
695
696public struct TuistCommand: AsyncParsableCommand {
697 public init() {}
698
699 public static var configuration: CommandConfiguration {
700 let comment = "brace in string }"
701 let multiline = """
702 brace in multiline }
703 escaped \"""
704 """
705 /* brace in comment } */
706 CommandConfiguration(commandName: "tuist")
707 }
708
709 #if os(macOS)
710 public static var groupedSubcommands: [ParsableCommand.Type] {
711 [InstallCommand.self]
712 }
713 #else
714 public static var groupedSubcommands: [ParsableCommand.Type] {
715 []
716 }
717 #endif
718
719 public func run() async throws {}
720}
721"#;
722 let plugin = CodeParserPlugin;
723 let entities = plugin.extract_entities(code, "TuistCommand.swift");
724 eprintln!(
725 "Swift conditional entities: {:?}",
726 entities
727 .iter()
728 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
729 .collect::<Vec<_>>()
730 );
731
732 let command = entities
733 .iter()
734 .find(|e| e.name == "TuistCommand")
735 .expect("Should recover TuistCommand struct");
736 assert_eq!(command.entity_type, "struct");
737 assert!(command.parent_id.is_none());
738
739 let renamed_code = code.replace("TuistCommand", "RenamedCommand");
740 let renamed_entities = plugin.extract_entities(&renamed_code, "TuistCommand.swift");
741 let renamed_command = renamed_entities
742 .iter()
743 .find(|e| e.name == "RenamedCommand")
744 .expect("Should recover renamed command struct");
745 assert_eq!(command.structural_hash, renamed_command.structural_hash);
746
747 for member in ["init", "configuration", "run"] {
748 let entity = entities
749 .iter()
750 .find(|e| e.name == member)
751 .unwrap_or_else(|| panic!("Should find {member}"));
752 assert_eq!(entity.parent_id.as_deref(), Some(command.id.as_str()));
753 }
754
755 let grouped_subcommands: Vec<_> = entities
756 .iter()
757 .filter(|e| e.name == "groupedSubcommands")
758 .collect();
759 assert_eq!(grouped_subcommands.len(), 2);
760 assert!(grouped_subcommands
761 .iter()
762 .all(|entity| entity.parent_id.as_deref() == Some(command.id.as_str())));
763 }
764
765 #[test]
766 fn test_swift_conditional_compilation_with_interpolated_brace_string() {
767 let plugin = CodeParserPlugin;
768 for (container_name, code) in [
769 (
770 "Config",
771 r#"
772class Config {
773 let tpl = "prefix \("}") suffix"
774#if DEBUG
775 func dump() { print(tpl) }
776#endif
777 func render() -> String { return tpl }
778}
779
780struct Tail { let q: Int }
781"#,
782 ),
783 (
784 "RawConfig",
785 r##"
786class RawConfig {
787 let tpl = #"prefix \#("{") suffix"#
788#if DEBUG
789 func dump() { print(tpl) }
790#endif
791 func render() -> String { return tpl }
792}
793"##,
794 ),
795 (
796 "MultilineConfig",
797 r#"
798class MultilineConfig {
799 let tpl = """
800 prefix \("}") suffix
801 """
802#if DEBUG
803 func dump() { print(tpl) }
804#endif
805 func render() -> String { return tpl }
806}
807"#,
808 ),
809 (
810 "ClosureConfig",
811 r#"
812class ClosureConfig {
813 let tpl = "prefix \(["}"].map { $0 }.joined()) suffix"
814#if DEBUG
815 func dump() { print(tpl) }
816#endif
817 func render() -> String { return tpl }
818}
819"#,
820 ),
821 ] {
822 let file_path = format!("{container_name}.swift");
823 let entities = plugin.extract_entities(code, &file_path);
824 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
825 let container = entities
826 .iter()
827 .find(|e| e.name == container_name)
828 .unwrap_or_else(|| {
829 panic!("Should recover {container_name}, got: {names:?}");
830 });
831 assert_eq!(container.entity_type, "class");
832 assert!(container.parent_id.is_none());
833
834 for member in ["tpl", "dump", "render"] {
835 let entity = entities
836 .iter()
837 .find(|e| e.name == member)
838 .unwrap_or_else(|| {
839 panic!("Should find {member} in {container_name}, got: {names:?}");
840 });
841 assert_eq!(entity.parent_id.as_deref(), Some(container.id.as_str()));
842 }
843 }
844 }
845
846 #[test]
847 fn test_elixir_entity_extraction() {
848 let code = r#"
849defmodule MyApp.Accounts do
850 def create_user(attrs) do
851 %User{}
852 |> User.changeset(attrs)
853 |> Repo.insert()
854 end
855
856 defp validate(attrs) do
857 # private helper
858 :ok
859 end
860
861 defmacro is_admin(user) do
862 quote do
863 unquote(user).role == :admin
864 end
865 end
866
867 defguard is_positive(x) when is_integer(x) and x > 0
868end
869
870defprotocol Printable do
871 def to_string(data)
872end
873
874defimpl Printable, for: Integer do
875 def to_string(i), do: Integer.to_string(i)
876end
877"#;
878 let plugin = CodeParserPlugin;
879 let entities = plugin.extract_entities(code, "accounts.ex");
880 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
881 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
882 eprintln!("Elixir entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
883
884 assert!(names.contains(&"MyApp.Accounts"), "Should find module, got: {:?}", names);
885 assert!(names.contains(&"create_user"), "Should find def, got: {:?}", names);
886 assert!(names.contains(&"validate"), "Should find defp, got: {:?}", names);
887 assert!(names.contains(&"is_admin"), "Should find defmacro, got: {:?}", names);
888 assert!(names.contains(&"Printable"), "Should find defprotocol, got: {:?}", names);
889
890 let create_user = entities.iter().find(|e| e.name == "create_user").unwrap();
892 assert!(create_user.parent_id.is_some(), "create_user should be nested under module");
893 }
894
895 #[test]
896 fn test_bash_entity_extraction() {
897 let code = r#"#!/bin/bash
898
899greet() {
900 echo "Hello, $1!"
901}
902
903function deploy {
904 echo "deploying..."
905}
906
907# not a function
908echo "main script"
909"#;
910 let plugin = CodeParserPlugin;
911 let entities = plugin.extract_entities(code, "deploy.sh");
912 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
913 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
914 eprintln!("Bash entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
915
916 assert!(names.contains(&"greet"), "Should find greet(), got: {:?}", names);
917 assert!(names.contains(&"deploy"), "Should find function deploy, got: {:?}", names);
918 assert_eq!(entities.len(), 2, "Should only find functions, got: {:?}", names);
919 }
920
921 #[test]
922 fn test_typescript_entity_extraction() {
923 let code = r#"
925export function hello(): string {
926 return "hello";
927}
928
929export class Greeter {
930 greet(name: string): string {
931 return `Hello, ${name}!`;
932 }
933}
934"#;
935 let plugin = CodeParserPlugin;
936 let entities = plugin.extract_entities(code, "test.ts");
937 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
938 assert!(names.contains(&"hello"), "Should find hello function");
939 assert!(names.contains(&"Greeter"), "Should find Greeter class");
940 }
941
942 #[test]
943 fn test_same_line_typescript_overload_ids_are_unique() {
944 let code = "function f(a: number): void {}; function f(a: string): void {}\n";
945 let plugin = CodeParserPlugin;
946 let entities = plugin.extract_entities(code, "over.ts");
947 let overloads: Vec<&SemanticEntity> = entities
948 .iter()
949 .filter(|entity| entity.name == "f" && entity.entity_type == "function")
950 .collect();
951 let ids: Vec<&str> = overloads.iter().map(|entity| entity.id.as_str()).collect();
952
953 assert_eq!(overloads.len(), 2, "expected both overloads, got: {entities:?}");
954 assert_eq!(ids, vec!["over.ts::function::f@L1#1", "over.ts::function::f@L1#2"]);
955 }
956
957 #[test]
958 fn test_same_line_duplicate_parent_ids_are_propagated_to_children() {
959 let code = "class C { m(){ return 1 } } class C { m(){ return 2 } }\n";
960 let plugin = CodeParserPlugin;
961 let entities = plugin.extract_entities(code, "c.ts");
962 let classes: Vec<&SemanticEntity> = entities
963 .iter()
964 .filter(|entity| entity.name == "C" && entity.entity_type == "class")
965 .collect();
966 let methods: Vec<&SemanticEntity> = entities
967 .iter()
968 .filter(|entity| entity.name == "m" && entity.entity_type == "method")
969 .collect();
970
971 assert_eq!(classes.len(), 2, "expected both classes, got: {entities:?}");
972 assert_eq!(methods.len(), 2, "expected both methods, got: {entities:?}");
973 assert_eq!(classes[0].id, "c.ts::class::C@L1#1");
974 assert_eq!(classes[1].id, "c.ts::class::C@L1#2");
975 assert_eq!(methods[0].parent_id.as_deref(), Some("c.ts::class::C@L1#1"));
976 assert_eq!(methods[1].parent_id.as_deref(), Some("c.ts::class::C@L1#2"));
977 assert_eq!(methods[0].id, "c.ts::class::C@L1#1::m");
978 assert_eq!(methods[1].id, "c.ts::class::C@L1#2::m");
979 }
980
981 #[test]
982 fn test_module_typescript_entity_extraction() {
983 let code = r#"
984export function hello(): string {
985 return "hello";
986}
987"#;
988 let plugin = CodeParserPlugin;
989 let entities = plugin.extract_entities(code, "test.mts");
990 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
991
992 assert!(names.contains(&"hello"), "Should find hello function");
993 }
994
995 #[test]
996 fn test_commonjs_typescript_entity_extraction() {
997 let code = r#"
998export class Greeter {
999 greet(name: string): string {
1000 return `Hello, ${name}!`;
1001 }
1002}
1003"#;
1004 let plugin = CodeParserPlugin;
1005 let entities = plugin.extract_entities(code, "test.cts");
1006 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1007
1008 assert!(names.contains(&"Greeter"), "Should find Greeter class");
1009 assert!(names.contains(&"greet"), "Should find greet method");
1010 }
1011
1012 #[test]
1013 fn test_typescript_generator_function_entity_extraction() {
1014 let code = r#"
1015export async function* streamUsers(): AsyncGenerator<string> {
1016 yield "alice";
1017}
1018"#;
1019 let plugin = CodeParserPlugin;
1020 let entities = plugin.extract_entities(code, "stream.ts");
1021 let stream = entities.iter().find(|e| e.name == "streamUsers");
1022
1023 assert!(stream.is_some(), "Should find generator function, got: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1024 assert_eq!(stream.unwrap().entity_type, "function");
1025 }
1026
1027 #[test]
1028 fn test_javascript_generator_function_entity_extraction() {
1029 let code = r#"
1030export function* ids() {
1031 yield 1;
1032 yield 2;
1033}
1034"#;
1035 let plugin = CodeParserPlugin;
1036 let entities = plugin.extract_entities(code, "ids.js");
1037 let ids = entities.iter().find(|e| e.name == "ids");
1038
1039 assert!(ids.is_some(), "Should find generator function, got: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1040 assert_eq!(ids.unwrap().entity_type, "function");
1041 }
1042
1043 #[test]
1044 fn test_nested_functions_typescript() {
1045 let code = r#"
1046function outer() {
1047 function inner() {
1048 return 42;
1049 }
1050 return inner();
1051}
1052"#;
1053 let plugin = CodeParserPlugin;
1054 let entities = plugin.extract_entities(code, "nested.ts");
1055 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1056 eprintln!("Nested TS: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
1057
1058 assert!(names.contains(&"outer"), "Should find outer, got: {:?}", names);
1059 assert!(names.contains(&"inner"), "Should find inner, got: {:?}", names);
1060
1061 let inner = entities.iter().find(|e| e.name == "inner").unwrap();
1062 assert!(inner.parent_id.is_some(), "inner should have parent_id");
1063 }
1064
1065 #[test]
1066 fn test_typescript_nested_anonymous_class_fields() {
1067 let code = r#"
1068class L1 {
1069 L2 = class {
1070 L3 = class {
1071 L4 = class {
1072 method() { return 1; }
1073 };
1074 };
1075 };
1076}
1077"#;
1078 let plugin = CodeParserPlugin;
1079 let entities = plugin.extract_entities(code, "a.ts");
1080 let find = |name: &str| {
1081 entities.iter().find(|e| e.name == name).unwrap_or_else(|| {
1082 panic!(
1083 "missing {name}; got: {:?}",
1084 entities
1085 .iter()
1086 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1087 .collect::<Vec<_>>()
1088 )
1089 })
1090 };
1091
1092 let l1 = find("L1");
1093 assert_eq!(l1.entity_type, "class");
1094 let l1_id = l1.id.clone();
1095
1096 let l2 = find("L2");
1097 assert_eq!(l2.entity_type, "field");
1098 assert_eq!(l2.parent_id.as_deref(), Some(l1_id.as_str()));
1099 let l2_id = l2.id.clone();
1100
1101 let l3 = find("L3");
1102 assert_eq!(l3.entity_type, "field");
1103 assert_eq!(l3.parent_id.as_deref(), Some(l2_id.as_str()));
1104 let l3_id = l3.id.clone();
1105
1106 let l4 = find("L4");
1107 assert_eq!(l4.entity_type, "field");
1108 assert_eq!(l4.parent_id.as_deref(), Some(l3_id.as_str()));
1109 let l4_id = l4.id.clone();
1110
1111 let method = find("method");
1112 assert_eq!(method.entity_type, "method");
1113 assert_eq!(method.parent_id.as_deref(), Some(l4_id.as_str()));
1114 assert_eq!(method.id, "a.ts::class::L1::L2::L3::L4::method");
1115 }
1116
1117 #[test]
1118 fn test_nested_functions_python() {
1119 let code = "def outer():\n def inner():\n return 42\n return inner()\n";
1120 let plugin = CodeParserPlugin;
1121 let entities = plugin.extract_entities(code, "nested.py");
1122 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1123
1124 assert!(names.contains(&"outer"), "got: {:?}", names);
1125 assert!(names.contains(&"inner"), "got: {:?}", names);
1126
1127 let inner = entities.iter().find(|e| e.name == "inner").unwrap();
1128 assert!(inner.parent_id.is_some(), "inner should have parent_id");
1129 }
1130
1131 #[test]
1132 fn test_nested_functions_rust() {
1133 let code = "fn outer() {\n fn inner() -> i32 {\n 42\n }\n inner();\n}\n";
1134 let plugin = CodeParserPlugin;
1135 let entities = plugin.extract_entities(code, "nested.rs");
1136 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1137
1138 assert!(names.contains(&"outer"), "got: {:?}", names);
1139 assert!(names.contains(&"inner"), "got: {:?}", names);
1140
1141 let inner = entities.iter().find(|e| e.name == "inner").unwrap();
1142 assert!(inner.parent_id.is_some(), "inner should have parent_id");
1143 }
1144
1145 #[test]
1146 fn test_rust_impl_blocks_unique_names() {
1147 let code = r#"
1148trait Greeting {
1149 fn greet(&self) -> String;
1150}
1151
1152struct Person;
1153struct Robot;
1154struct Cat;
1155
1156impl Greeting for Person {
1157 fn greet(&self) -> String { "Hello".to_string() }
1158}
1159
1160impl Greeting for Robot {
1161 fn greet(&self) -> String { "Beep".to_string() }
1162}
1163
1164impl Greeting for Cat {
1165 fn greet(&self) -> String { "Meow".to_string() }
1166}
1167"#;
1168 let plugin = CodeParserPlugin;
1169 let entities = plugin.extract_entities(code, "impls.rs");
1170 let impl_entities: Vec<&_> = entities.iter()
1171 .filter(|e| e.entity_type == "impl")
1172 .collect();
1173 let names: Vec<&str> = impl_entities.iter().map(|e| e.name.as_str()).collect();
1174
1175 assert_eq!(impl_entities.len(), 3, "Should find 3 impl blocks, got: {:?}", names);
1176 assert!(names.contains(&"Greeting for Person"), "got: {:?}", names);
1177 assert!(names.contains(&"Greeting for Robot"), "got: {:?}", names);
1178 assert!(names.contains(&"Greeting for Cat"), "got: {:?}", names);
1179 }
1180
1181 #[test]
1182 fn test_nested_functions_go() {
1183 let code = "package main\n\nfunc outer() {\n var x int = 42\n _ = x\n}\n";
1185 let plugin = CodeParserPlugin;
1186 let entities = plugin.extract_entities(code, "nested.go");
1187 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1188
1189 assert!(names.contains(&"outer"), "got: {:?}", names);
1190 }
1191
1192 #[test]
1193 fn test_renamed_function_same_structural_hash() {
1194 let code_a = "def get_card():\n return db.query('cards')\n";
1195 let code_b = "def get_card_1():\n return db.query('cards')\n";
1196
1197 let plugin = CodeParserPlugin;
1198 let entities_a = plugin.extract_entities(code_a, "a.py");
1199 let entities_b = plugin.extract_entities(code_b, "b.py");
1200
1201 assert_eq!(entities_a.len(), 1, "Should find one entity in a");
1202 assert_eq!(entities_b.len(), 1, "Should find one entity in b");
1203 assert_eq!(entities_a[0].name, "get_card");
1204 assert_eq!(entities_b[0].name, "get_card_1");
1205
1206 assert_eq!(
1208 entities_a[0].structural_hash, entities_b[0].structural_hash,
1209 "Renamed function with identical body should have same structural_hash"
1210 );
1211
1212 assert_ne!(
1214 entities_a[0].content_hash, entities_b[0].content_hash,
1215 "Content hash should differ since raw content includes the name"
1216 );
1217 }
1218
1219 #[test]
1220 fn test_swift_renamed_operator_same_structural_hash() {
1221 let plugin = CodeParserPlugin;
1222 let entities_a = plugin.extract_entities("prefix operator ~~~\n", "a.swift");
1223 let entities_b = plugin.extract_entities("prefix operator !!!\n", "b.swift");
1224
1225 assert_eq!(entities_a.len(), 1, "Should find one entity in a");
1226 assert_eq!(entities_b.len(), 1, "Should find one entity in b");
1227 assert_eq!(entities_a[0].name, "~~~");
1228 assert_eq!(entities_b[0].name, "!!!");
1229 assert_eq!(entities_a[0].entity_type, "operator");
1230 assert_eq!(entities_b[0].entity_type, "operator");
1231 assert_eq!(
1232 entities_a[0].structural_hash, entities_b[0].structural_hash,
1233 "Renamed operator with otherwise identical declaration should have same structural_hash"
1234 );
1235 assert_ne!(
1236 entities_a[0].content_hash, entities_b[0].content_hash,
1237 "Content hash should differ since raw content includes the operator token"
1238 );
1239 }
1240
1241 #[test]
1242 fn test_swift_synthesized_names_disambiguate_overloads() {
1243 let plugin = CodeParserPlugin;
1244 let code = r#"
1245struct Matrix {
1246 subscript(row: Int) -> Double {
1247 return Double(row)
1248 }
1249
1250 subscript(row: Int, column: Int) -> Double {
1251 return Double(row + column)
1252 }
1253}
1254
1255class Builder {
1256 init(value: Int) {}
1257 init(text: String) {}
1258}
1259"#;
1260
1261 let entities = plugin.extract_entities(code, "Overloads.swift");
1262
1263 let subscript_ids: Vec<&str> = entities
1264 .iter()
1265 .filter(|e| e.entity_type == "subscript")
1266 .map(|e| e.id.as_str())
1267 .collect();
1268 assert_eq!(subscript_ids.len(), 2);
1269 assert_ne!(subscript_ids[0], subscript_ids[1]);
1270 assert!(subscript_ids.iter().all(|id| id.contains("@L")));
1271
1272 let init_ids: Vec<&str> = entities
1273 .iter()
1274 .filter(|e| e.entity_type == "init")
1275 .map(|e| e.id.as_str())
1276 .collect();
1277 assert_eq!(init_ids.len(), 2);
1278 assert_ne!(init_ids[0], init_ids[1]);
1279 assert!(init_ids.iter().all(|id| id.contains("@L")));
1280 }
1281
1282 #[test]
1283 fn test_hcl_entity_extraction() {
1284 let code = r#"
1285region = "eu-west-1"
1286
1287variable "image_id" {
1288 type = string
1289}
1290
1291resource "aws_instance" "web" {
1292 ami = var.image_id
1293
1294 lifecycle {
1295 create_before_destroy = true
1296 }
1297}
1298"#;
1299 let plugin = CodeParserPlugin;
1300 let entities = plugin.extract_entities(code, "main.tf");
1301 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1302 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1303 eprintln!("HCL entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
1304
1305 assert!(names.contains(&"region"), "Should find top-level attribute, got: {:?}", names);
1306 assert!(names.contains(&"variable.image_id"), "Should find variable block, got: {:?}", names);
1307 assert!(names.contains(&"resource.aws_instance.web"), "Should find resource block, got: {:?}", names);
1308 assert!(
1309 names.contains(&"resource.aws_instance.web.lifecycle"),
1310 "Should find nested lifecycle block with qualified name, got: {:?}",
1311 names
1312 );
1313 assert!(!names.contains(&"ami"), "Should skip nested attributes inside blocks, got: {:?}", names);
1314 assert!(
1315 !names.contains(&"create_before_destroy"),
1316 "Should skip nested attributes inside nested blocks, got: {:?}",
1317 names
1318 );
1319
1320 let lifecycle = entities
1321 .iter()
1322 .find(|e| e.name == "resource.aws_instance.web.lifecycle")
1323 .unwrap();
1324 assert!(lifecycle.parent_id.is_some(), "lifecycle should be nested under resource");
1325 assert!(types.contains(&"attribute"), "Should preserve attribute entity type for top-level attributes");
1326 }
1327
1328 #[test]
1329 fn test_kotlin_entity_extraction() {
1330 let code = r#"
1331class UserService {
1332 val name: String = ""
1333
1334 fun greet(): String {
1335 return "Hello, $name"
1336 }
1337
1338 companion object {
1339 fun create(): UserService = UserService()
1340 }
1341}
1342
1343interface Repository {
1344 fun findById(id: Int): Any?
1345}
1346
1347object AppConfig {
1348 val version = "1.0"
1349}
1350
1351fun topLevel(x: Int): Int = x * 2
1352"#;
1353 let plugin = CodeParserPlugin;
1354 let entities = plugin.extract_entities(code, "App.kt");
1355 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1356 eprintln!("Kotlin entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1357 assert!(names.contains(&"UserService"), "got: {:?}", names);
1358 assert!(names.contains(&"greet"), "got: {:?}", names);
1359 assert!(names.contains(&"Repository"), "got: {:?}", names);
1360 assert!(names.contains(&"findById"), "got: {:?}", names);
1361 assert!(names.contains(&"AppConfig"), "got: {:?}", names);
1362 assert!(names.contains(&"topLevel"), "got: {:?}", names);
1363 }
1364
1365 #[test]
1366 fn test_xml_entity_extraction() {
1367 let code = r#"<?xml version="1.0" encoding="UTF-8"?>
1368<project>
1369 <groupId>com.example</groupId>
1370 <artifactId>my-app</artifactId>
1371 <dependencies>
1372 <dependency>
1373 <groupId>junit</groupId>
1374 <artifactId>junit</artifactId>
1375 </dependency>
1376 </dependencies>
1377 <build>
1378 <plugins>
1379 <plugin>
1380 <groupId>org.apache.maven</groupId>
1381 </plugin>
1382 </plugins>
1383 </build>
1384</project>
1385"#;
1386 let plugin = CodeParserPlugin;
1387 let entities = plugin.extract_entities(code, "pom.xml");
1388 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1389 eprintln!("XML entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1390 assert!(names.contains(&"project"), "got: {:?}", names);
1391 assert!(names.contains(&"dependencies"), "got: {:?}", names);
1392 assert!(names.contains(&"build"), "got: {:?}", names);
1393 }
1394
1395 #[test]
1396 fn test_arrow_callback_scope_boundary_typescript() {
1397 let code = r#"
1401const activeQueues = [
1402 { queue: queues.fooQueue, processor: foo.process },
1403];
1404
1405activeQueues.forEach((handler: any) => {
1406 const queue = handler.queue;
1407 let retries = 0;
1408
1409 class QueueHandler {
1410 handle() { return queue; }
1411 }
1412
1413 function createHandler() {
1414 return new QueueHandler();
1415 }
1416
1417 queue.process((job) => {
1418 const orderId = job.data.orderId;
1419 return orderId;
1420 });
1421});
1422
1423function handleFailure(job: any, err: any) {
1424 console.error('failed', err);
1425}
1426"#;
1427 let plugin = CodeParserPlugin;
1428 let entities = plugin.extract_entities(code, "process.ts");
1429 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1430 let top_level: Vec<&str> = entities
1431 .iter()
1432 .filter(|e| e.parent_id.is_none())
1433 .map(|e| e.name.as_str())
1434 .collect();
1435
1436 assert!(top_level.contains(&"activeQueues"), "got: {:?}", top_level);
1438 assert!(top_level.contains(&"handleFailure"), "got: {:?}", top_level);
1439
1440 assert!(names.contains(&"QueueHandler"), "got: {:?}", names);
1442 assert!(names.contains(&"handle"), "got: {:?}", names);
1443 assert!(names.contains(&"createHandler"), "got: {:?}", names);
1444
1445 assert!(!names.contains(&"queue"), "got: {:?}", names);
1447 assert!(!names.contains(&"retries"), "got: {:?}", names);
1448 assert!(!names.contains(&"orderId"), "got: {:?}", names);
1449 }
1450
1451 #[test]
1452 fn test_top_level_iife_wrapper_still_extracts_typescript_entities() {
1453 let code = r#"
1454function factory() {
1455 class Foo {
1456 method(): number {
1457 return 1;
1458 }
1459 }
1460
1461 function bar(): Foo {
1462 return new Foo();
1463 }
1464}
1465
1466factory();
1467"#;
1468 let plugin = CodeParserPlugin;
1469 let entities = plugin.extract_entities(code, "wrapped.ts");
1470 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1471 assert!(
1472 names.contains(&"factory"),
1473 "Should find top-level wrapper function, got: {:?}",
1474 names
1475 );
1476 assert!(
1477 names.contains(&"Foo"),
1478 "Should find class inside top-level wrapper, got: {:?}",
1479 names
1480 );
1481 assert!(
1482 names.contains(&"bar"),
1483 "Should find function inside top-level wrapper, got: {:?}",
1484 names
1485 );
1486 }
1487
1488 #[test]
1489 fn test_top_level_iife_still_extracts_typescript_entities() {
1490 let code = r#"
1491(() => {
1492 class Foo {
1493 method(): number {
1494 return 1;
1495 }
1496 }
1497
1498 function bar(): Foo {
1499 return new Foo();
1500 }
1501})();
1502"#;
1503 let plugin = CodeParserPlugin;
1504 let entities = plugin.extract_entities(code, "iife.ts");
1505 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1506 assert!(
1507 names.contains(&"Foo"),
1508 "Should find class inside top-level IIFE, got: {:?}",
1509 names
1510 );
1511 assert!(
1512 names.contains(&"bar"),
1513 "Should find function inside top-level IIFE, got: {:?}",
1514 names
1515 );
1516 }
1517
1518 #[test]
1519 fn test_function_locals_not_extracted_as_nested_entities_typescript() {
1520 let code = r#"
1521export default function foo() {
1522 const x = 1;
1523 return x;
1524}
1525"#;
1526 let plugin = CodeParserPlugin;
1527 let entities = plugin.extract_entities(code, "default-export.ts");
1528 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1529 assert!(
1530 names.contains(&"foo"),
1531 "Should find exported function, got: {:?}",
1532 names
1533 );
1534 assert!(
1535 !names.contains(&"x"),
1536 "Local inside function should not be extracted as an entity, got: {:?}",
1537 names
1538 );
1539 }
1540
1541 #[test]
1542 fn test_function_expression_scope_boundary_typescript() {
1543 let code = r#"
1546const foo = function namedExpr(x: number) {
1547 const inner = x + 1;
1548 return inner;
1549};
1550
1551const bar = function(y: number) {
1552 const local = y * 2;
1553 return local;
1554};
1555
1556const items = [1, 2, 3];
1557
1558items.forEach(function process(item) {
1559 const doubled = item * 2;
1560 console.log(doubled);
1561});
1562"#;
1563 let plugin = CodeParserPlugin;
1564 let entities = plugin.extract_entities(code, "funexpr.ts");
1565 let top_level: Vec<&str> = entities
1566 .iter()
1567 .filter(|e| e.parent_id.is_none())
1568 .map(|e| e.name.as_str())
1569 .collect();
1570 let find = |name: &str| entities.iter().find(|e| e.name == name).unwrap();
1571 let all_names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1572
1573 assert!(top_level.contains(&"foo"), "got: {:?}", top_level);
1576 assert!(top_level.contains(&"bar"), "got: {:?}", top_level);
1577 assert!(top_level.contains(&"items"), "got: {:?}", top_level);
1578 assert_eq!(find("foo").entity_type, "function");
1579 assert_eq!(find("bar").entity_type, "function");
1580 assert_eq!(find("items").entity_type, "variable");
1581
1582 assert!(!all_names.contains(&"inner"), "got: {:?}", all_names);
1584 assert!(!all_names.contains(&"local"), "got: {:?}", all_names);
1585 assert!(!all_names.contains(&"doubled"), "got: {:?}", all_names);
1586
1587 assert!(!top_level.contains(&"process"), "got: {:?}", top_level);
1589 }
1590
1591 #[test]
1592 fn test_variable_assigned_arrow_extracts_inner_entities() {
1593 let code = r#"
1596const handler = () => {
1597 class Inner {
1598 run() { return 1; }
1599 }
1600
1601 function make() {
1602 return new Inner();
1603 }
1604
1605 const local = 42;
1606};
1607"#;
1608 let plugin = CodeParserPlugin;
1609 let entities = plugin.extract_entities(code, "assigned.ts");
1610 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1611 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1612
1613 assert_eq!(handler.entity_type, "function");
1614 assert!(names.contains(&"handler"), "got: {:?}", names);
1615 assert!(names.contains(&"Inner"), "got: {:?}", names);
1616 assert!(names.contains(&"run"), "got: {:?}", names);
1617 assert!(names.contains(&"make"), "got: {:?}", names);
1618 assert!(!names.contains(&"local"), "got: {:?}", names);
1619 }
1620
1621 #[test]
1622 fn test_variable_assigned_function_expression_extracts_inner_entities() {
1623 let code = r#"
1625const handler = function() {
1626 class Inner {}
1627 function make() { return new Inner(); }
1628 const local = 42;
1629};
1630"#;
1631 let plugin = CodeParserPlugin;
1632 let entities = plugin.extract_entities(code, "funexpr-inner.ts");
1633 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1634 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1635
1636 assert_eq!(handler.entity_type, "function");
1637 assert!(names.contains(&"handler"), "got: {:?}", names);
1638 assert!(names.contains(&"Inner"), "got: {:?}", names);
1639 assert!(names.contains(&"make"), "got: {:?}", names);
1640 assert!(!names.contains(&"local"), "got: {:?}", names);
1641 }
1642
1643 #[test]
1644 fn test_let_assigned_arrow_stays_variable_typescript() {
1645 let code = r#"
1646let handler = () => {
1647 return 42;
1648};
1649"#;
1650 let plugin = CodeParserPlugin;
1651 let entities = plugin.extract_entities(code, "let-assigned.ts");
1652 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1653
1654 assert_eq!(handler.entity_type, "variable");
1655 }
1656
1657 #[test]
1658 fn test_const_assigned_arrow_promoted_to_function_javascript() {
1659 let code = r#"
1660const handler = () => {
1661 return 42;
1662};
1663"#;
1664 let plugin = CodeParserPlugin;
1665 let entities = plugin.extract_entities(code, "handler.js");
1666 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1667
1668 assert_eq!(handler.entity_type, "function");
1669 }
1670
1671 #[test]
1672 fn test_js_ts_multi_declarator_promotes_each_const_initializer() {
1673 let code = r#"
1674const value = 1, handler = () => value;
1675const first = () => 1, second = 2;
1676"#;
1677 let plugin = CodeParserPlugin;
1678 let entities = plugin.extract_entities(code, "sample.ts");
1679 let find = |name: &str| {
1680 entities.iter().find(|e| e.name == name).unwrap_or_else(|| {
1681 panic!(
1682 "missing {name}; got: {:?}",
1683 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>()
1684 )
1685 })
1686 };
1687
1688 assert_eq!(find("value").entity_type, "variable");
1689 assert_eq!(find("handler").entity_type, "function");
1690 assert_eq!(find("first").entity_type, "function");
1691 assert_eq!(find("second").entity_type, "variable");
1692 }
1693
1694 #[test]
1695 fn test_suppressed_multi_declarator_traverses_skipped_initializers() {
1696 let code = r#"
1697function wrapper() {
1698 const holder = class {
1699 run() { return 1; }
1700 }, handler = () => {
1701 class Inner {
1702 go() { return 2; }
1703 }
1704 }, value = 1;
1705}
1706"#;
1707 let plugin = CodeParserPlugin;
1708 let entities = plugin.extract_entities(code, "sample.ts");
1709 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1710 let find = |name: &str| {
1711 entities.iter().find(|e| e.name == name).unwrap_or_else(|| {
1712 panic!(
1713 "missing {name}; got: {:?}",
1714 entities
1715 .iter()
1716 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1717 .collect::<Vec<_>>()
1718 )
1719 })
1720 };
1721
1722 assert_eq!(find("wrapper").entity_type, "function");
1723 assert_eq!(find("handler").entity_type, "function");
1724 assert!(names.contains(&"run"), "got: {:?}", names);
1725 assert!(names.contains(&"Inner"), "got: {:?}", names);
1726 assert!(names.contains(&"go"), "got: {:?}", names);
1727 assert!(!names.contains(&"holder"), "got: {:?}", names);
1728 assert!(!names.contains(&"value"), "got: {:?}", names);
1729 }
1730
1731 #[test]
1732 fn test_go_var_declaration() {
1733 let code = r#"package featuremgmt
1734
1735type FeatureFlag struct {
1736 Name string
1737 Description string
1738 Stage string
1739}
1740
1741var standardFeatureFlags = []FeatureFlag{
1742 {
1743 Name: "panelTitleSearch",
1744 Description: "Search for dashboards using panel title",
1745 Stage: "PublicPreview",
1746 },
1747}
1748
1749func GetFlags() []FeatureFlag {
1750 return standardFeatureFlags
1751}
1752"#;
1753 let plugin = CodeParserPlugin;
1754 let entities = plugin.extract_entities(code, "flags.go");
1755 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1756 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1757 eprintln!("Go entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
1758
1759 assert!(names.contains(&"FeatureFlag"), "Should find type FeatureFlag, got: {:?}", names);
1760 assert!(names.contains(&"standardFeatureFlags"), "Should find var standardFeatureFlags, got: {:?}", names);
1761 assert!(names.contains(&"GetFlags"), "Should find func GetFlags, got: {:?}", names);
1762 }
1763
1764 #[test]
1765 fn test_go_grouped_var_declaration() {
1766 let code = r#"package test
1767
1768var (
1769 simple = 42
1770 flags = []string{"a", "b"}
1771)
1772
1773const (
1774 x = 1
1775 y = 2
1776)
1777
1778func main() {}
1779"#;
1780 let plugin = CodeParserPlugin;
1781 let entities = plugin.extract_entities(code, "test.go");
1782 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1783 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1784 eprintln!("Go grouped entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
1785
1786 assert!(names.contains(&"flags") || names.contains(&"simple"), "Should find grouped var, got: {:?}", names);
1787 assert!(names.contains(&"x"), "Should find grouped const x, got: {:?}", names);
1788 assert!(names.contains(&"main"), "Should find func main, got: {:?}", names);
1789 }
1790
1791 #[test]
1792 fn test_dart_entity_extraction() {
1793 let code = r#"
1794import 'dart:math';
1795
1796class Calculator {
1797 final String name;
1798
1799 Calculator(this.name);
1800
1801 Calculator.withDefault() : name = 'default';
1802
1803 factory Calculator.create(String name) {
1804 return Calculator(name);
1805 }
1806
1807 int add(int a, int b) {
1808 return a + b;
1809 }
1810
1811 int get doubleAdd => add(1, 1) * 2;
1812
1813 set label(String value) {
1814 // no-op
1815 }
1816
1817 int operator +(Calculator other) {
1818 return 0;
1819 }
1820}
1821
1822mixin Loggable {
1823 void log(String message) {
1824 print(message);
1825 }
1826}
1827
1828extension StringExt on String {
1829 bool get isBlank => trim().isEmpty;
1830}
1831
1832enum Status {
1833 active,
1834 inactive;
1835
1836 String display() => name.toUpperCase();
1837}
1838
1839typedef Callback = void Function(int);
1840
1841int add(int a, int b) {
1842 return a + b;
1843}
1844
1845extension type Wrapper(int value) implements int {}
1846"#;
1847 let plugin = CodeParserPlugin;
1848 let entities = plugin.extract_entities(code, "calculator.dart");
1849 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1850 eprintln!(
1851 "Dart entities: {:?}",
1852 entities
1853 .iter()
1854 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1855 .collect::<Vec<_>>()
1856 );
1857
1858 assert!(names.contains(&"Calculator"), "Should find class, got: {:?}", names);
1860 assert!(names.contains(&"Loggable"), "Should find mixin, got: {:?}", names);
1861 assert!(names.contains(&"StringExt"), "Should find extension, got: {:?}", names);
1862 assert!(names.contains(&"Status"), "Should find enum, got: {:?}", names);
1863 assert!(names.contains(&"Callback"), "Should find typedef, got: {:?}", names);
1864 assert!(names.contains(&"add"), "Should find top-level function, got: {:?}", names);
1865 assert!(names.contains(&"Wrapper"), "Should find extension type, got: {:?}", names);
1866
1867 let add_method = entities.iter().find(|e| e.name == "add" && e.parent_id.is_some());
1869 assert!(add_method.is_some(), "Should find add method inside Calculator");
1870 assert_eq!(add_method.unwrap().entity_type, "method");
1871
1872 let unnamed_ctor = entities.iter().find(|e| e.name == "Calculator" && e.entity_type == "constructor");
1874 assert!(unnamed_ctor.is_some(), "Should find unnamed constructor");
1875 let named_ctor = entities.iter().find(|e| e.name == "Calculator.withDefault");
1876 assert!(named_ctor.is_some(), "Should find named constructor Calculator.withDefault, got: {:?}", names);
1877 assert_eq!(named_ctor.unwrap().entity_type, "constructor");
1878 assert_ne!(unnamed_ctor.unwrap().id, named_ctor.unwrap().id, "Named and unnamed constructors must have different entity IDs");
1879
1880 let factory_ctor = entities.iter().find(|e| e.name == "Calculator.create");
1882 assert!(factory_ctor.is_some(), "Should find factory constructor Calculator.create, got: {:?}", names);
1883 assert_eq!(factory_ctor.unwrap().entity_type, "constructor");
1884
1885 let getter = entities.iter().find(|e| e.name == "doubleAdd");
1887 assert!(getter.is_some(), "Should find getter doubleAdd");
1888 assert_eq!(getter.unwrap().entity_type, "getter");
1889
1890 let setter = entities.iter().find(|e| e.name == "label");
1891 assert!(setter.is_some(), "Should find setter label");
1892 assert_eq!(setter.unwrap().entity_type, "setter");
1893
1894 let operator = entities.iter().find(|e| e.name == "operator +");
1895 assert!(operator.is_some(), "Should find operator +");
1896 assert_eq!(operator.unwrap().entity_type, "method");
1897
1898 let log_method = entities.iter().find(|e| e.name == "log");
1900 assert!(log_method.is_some(), "Should find log in Loggable");
1901 assert!(log_method.unwrap().parent_id.is_some(), "log should have parent_id");
1902
1903 let callback = entities.iter().find(|e| e.name == "Callback").unwrap();
1905 assert_eq!(callback.entity_type, "type", "typedef should map to 'type'");
1906
1907 let loggable = entities.iter().find(|e| e.name == "Loggable").unwrap();
1908 assert_eq!(loggable.entity_type, "mixin");
1909
1910 let ext = entities.iter().find(|e| e.name == "StringExt").unwrap();
1911 assert_eq!(ext.entity_type, "extension");
1912
1913 let wrapper = entities.iter().find(|e| e.name == "Wrapper").unwrap();
1914 assert_eq!(wrapper.entity_type, "extension");
1915 }
1916
1917 #[test]
1918 fn test_dart_top_level_function_includes_body() {
1919 let code = r#"
1920int add(int a, int b) {
1921 return a + b;
1922}
1923
1924String greet(String name) => 'Hello, $name!';
1925"#;
1926 let plugin = CodeParserPlugin;
1927 let entities = plugin.extract_entities(code, "funcs.dart");
1928 eprintln!(
1929 "Dart top-level: {:?}",
1930 entities
1931 .iter()
1932 .map(|e| (&e.name, &e.entity_type, &e.content))
1933 .collect::<Vec<_>>()
1934 );
1935
1936 let add_fn = entities.iter().find(|e| e.name == "add").unwrap();
1937 assert!(
1938 add_fn.content.contains("return a + b"),
1939 "Top-level function content should include the body, got: {:?}",
1940 add_fn.content
1941 );
1942
1943 let greet_fn = entities.iter().find(|e| e.name == "greet").unwrap();
1944 assert!(
1945 greet_fn.content.contains("Hello"),
1946 "Expression body should be included, got: {:?}",
1947 greet_fn.content
1948 );
1949
1950 let code_v2 = r#"
1952int add(int a, int b) {
1953 return a * b;
1954}
1955
1956String greet(String name) => 'Hello, $name!';
1957"#;
1958 let entities_v2 = plugin.extract_entities(code_v2, "funcs.dart");
1959 let add_v2 = entities_v2.iter().find(|e| e.name == "add").unwrap();
1960 assert_ne!(
1961 add_fn.content_hash, add_v2.content_hash,
1962 "Body change should produce different content_hash"
1963 );
1964
1965 let greet_v2 = entities_v2.iter().find(|e| e.name == "greet").unwrap();
1967 assert_eq!(
1968 greet_fn.content_hash, greet_v2.content_hash,
1969 "Unchanged function should keep the same content_hash"
1970 );
1971 }
1972
1973 #[test]
1974 fn test_dart_renamed_named_constructor_same_structural_hash() {
1975 let code_a = r#"
1976class Foo {
1977 Foo.fromJson(Map<String, dynamic> json) {
1978 print(json);
1979 }
1980}
1981"#;
1982 let code_b = r#"
1983class Foo {
1984 Foo.fromMap(Map<String, dynamic> json) {
1985 print(json);
1986 }
1987}
1988"#;
1989 let plugin = CodeParserPlugin;
1990 let entities_a = plugin.extract_entities(code_a, "a.dart");
1991 let entities_b = plugin.extract_entities(code_b, "b.dart");
1992
1993 let ctor_a = entities_a.iter().find(|e| e.name == "Foo.fromJson").unwrap();
1994 let ctor_b = entities_b.iter().find(|e| e.name == "Foo.fromMap").unwrap();
1995
1996 assert_eq!(
1997 ctor_a.structural_hash, ctor_b.structural_hash,
1998 "Renamed named constructor with identical body should have same structural_hash"
1999 );
2000 assert_ne!(
2001 ctor_a.content_hash, ctor_b.content_hash,
2002 "Content hash should differ since raw content includes the name"
2003 );
2004 }
2005
2006 #[test]
2007 fn test_dart_top_level_getter_setter() {
2008 let code = r#"
2009int _value = 0;
2010
2011int get currentValue {
2012 return _value;
2013}
2014
2015set currentValue(int v) {
2016 _value = v;
2017}
2018"#;
2019 let plugin = CodeParserPlugin;
2020 let entities = plugin.extract_entities(code, "accessors.dart");
2021 eprintln!(
2022 "Dart top-level accessors: {:?}",
2023 entities
2024 .iter()
2025 .map(|e| (&e.name, &e.entity_type, &e.content))
2026 .collect::<Vec<_>>()
2027 );
2028
2029 let getter = entities.iter().find(|e| e.name == "currentValue" && e.entity_type == "getter");
2030 assert!(getter.is_some(), "Should find top-level getter, got: {:?}",
2031 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2032 assert!(
2033 getter.unwrap().content.contains("return _value"),
2034 "Top-level getter content should include the body"
2035 );
2036 assert!(getter.unwrap().parent_id.is_none(), "Top-level getter should have no parent");
2037
2038 let setter = entities.iter().find(|e| e.name == "currentValue" && e.entity_type == "function");
2042 assert!(setter.is_some(), "Should find top-level setter as function, got: {:?}",
2043 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2044 assert!(
2045 setter.unwrap().content.contains("_value = v"),
2046 "Top-level setter content should include the body"
2047 );
2048 }
2049
2050 #[test]
2051 fn test_dart_field_entity_type() {
2052 let code = r#"
2053class Config {
2054 final String name;
2055 static const int maxRetries = 3;
2056}
2057"#;
2058 let plugin = CodeParserPlugin;
2059 let entities = plugin.extract_entities(code, "config.dart");
2060 eprintln!(
2061 "Dart fields: {:?}",
2062 entities
2063 .iter()
2064 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
2065 .collect::<Vec<_>>()
2066 );
2067
2068 let name_field = entities.iter().find(|e| e.name == "name" && e.parent_id.is_some());
2069 assert!(name_field.is_some(), "Should find field 'name', got: {:?}",
2070 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2071 assert_eq!(name_field.unwrap().entity_type, "field");
2072
2073 let max_retries = entities.iter().find(|e| e.name == "maxRetries");
2074 assert!(max_retries.is_some(), "Should find field 'maxRetries', got: {:?}",
2075 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2076 assert_eq!(max_retries.unwrap().entity_type, "field");
2077 }
2078
2079 #[test]
2080 fn test_dart_identifier_list_fields() {
2081 let code = r#"
2085abstract class Shape {
2086 abstract double x, y;
2087 abstract String label;
2088}
2089"#;
2090 let plugin = CodeParserPlugin;
2091 let entities = plugin.extract_entities(code, "shape.dart");
2092 eprintln!(
2093 "Dart identifier_list fields: {:?}",
2094 entities
2095 .iter()
2096 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
2097 .collect::<Vec<_>>()
2098 );
2099
2100 let x_field = entities.iter().find(|e| e.name == "x");
2101 assert!(x_field.is_some(), "Should find field 'x' from identifier_list, got: {:?}",
2102 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2103 assert_eq!(x_field.unwrap().entity_type, "field");
2104 assert!(x_field.unwrap().parent_id.is_some(), "field 'x' should be nested under Shape");
2105
2106 let label_field = entities.iter().find(|e| e.name == "label");
2107 assert!(label_field.is_some(), "Should find field 'label' from single-element identifier_list, got: {:?}",
2108 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2109 assert_eq!(label_field.unwrap().entity_type, "field");
2110 }
2111
2112 #[test]
2113 fn test_ocaml_entity_extraction() {
2114 let code = r#"
2115type color = Red | Green | Blue
2116
2117type point = {
2118 x : float;
2119 y : float;
2120}
2121
2122exception Not_found of string
2123
2124let greet name =
2125 Printf.printf "Hello, %s!\n" name
2126
2127let add a b = a + b
2128
2129let version = "1.0"
2130
2131let color_to_string = function
2132 | Red -> "red"
2133 | Blue -> "blue"
2134
2135let inc = fun x -> x + 1
2136
2137module MyModule = struct
2138 let helper x = x * 2
2139end
2140
2141module type Printable = sig
2142 val to_string : 'a -> string
2143end
2144
2145external caml_input : in_channel -> bytes -> int -> int -> int = "caml_input"
2146
2147class point_class x_init = object
2148 val mutable x = x_init
2149 method get_x = x
2150end
2151
2152class type measurable = object
2153 method measure : float
2154end
2155"#;
2156 let plugin = CodeParserPlugin;
2157 let entities = plugin.extract_entities(code, "example.ml");
2158 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2159 eprintln!("OCaml entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2160
2161 let find = |name: &str| entities.iter().find(|e| e.name == name)
2162 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2163
2164 assert_eq!(find("color").entity_type, "type");
2165 assert_eq!(find("point").entity_type, "type");
2166 assert_eq!(find("Not_found").entity_type, "exception");
2167 assert_eq!(find("greet").entity_type, "function");
2168 assert_eq!(find("add").entity_type, "function");
2169 assert_eq!(find("version").entity_type, "value");
2170 assert_eq!(find("color_to_string").entity_type, "function");
2171 assert_eq!(find("inc").entity_type, "function");
2172 assert_eq!(find("MyModule").entity_type, "module");
2173 assert_eq!(find("Printable").entity_type, "module_type");
2174 assert_eq!(find("caml_input").entity_type, "external");
2175 assert_eq!(find("point_class").entity_type, "class");
2176 assert_eq!(find("measurable").entity_type, "class_type");
2177 }
2178
2179 #[test]
2180 fn test_ocaml_nested_module_entities() {
2181 let code = r#"
2182module Outer = struct
2183 let x = 42
2184
2185 module Inner = struct
2186 let y = 0
2187 end
2188end
2189"#;
2190 let plugin = CodeParserPlugin;
2191 let entities = plugin.extract_entities(code, "nested.ml");
2192 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2193 eprintln!("OCaml nested: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
2194
2195 let find = |name: &str| entities.iter().find(|e| e.name == name)
2196 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2197
2198 let outer = find("Outer");
2199 let x = find("x");
2200 let inner = find("Inner");
2201 let y = find("y");
2202
2203 assert_eq!(outer.entity_type, "module");
2204 assert_eq!(x.entity_type, "value");
2205 assert_eq!(inner.entity_type, "module");
2206 assert_eq!(y.entity_type, "value");
2207
2208 assert!(x.parent_id.as_ref().is_some_and(|p| p == &outer.id), "x should be nested under Outer");
2209 assert!(inner.parent_id.as_ref().is_some_and(|p| p == &outer.id), "Inner should be nested under Outer");
2210 assert!(y.parent_id.as_ref().is_some_and(|p| p == &inner.id), "y should be nested under Inner");
2211 }
2212
2213 #[test]
2214 fn test_ocaml_interface_entity_extraction() {
2215 let code = r#"
2216type t
2217
2218val create : string -> t
2219val to_string : t -> string
2220
2221exception Invalid_input of string
2222
2223module type Serializable = sig
2224 val serialize : t -> string
2225end
2226"#;
2227 let plugin = CodeParserPlugin;
2228 let entities = plugin.extract_entities(code, "example.mli");
2229 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2230 eprintln!("OCaml interface entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2231
2232 let find = |name: &str| entities.iter().find(|e| e.name == name)
2233 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2234
2235 assert_eq!(find("t").entity_type, "type");
2236 assert_eq!(find("create").entity_type, "val");
2237 assert_eq!(find("to_string").entity_type, "val");
2238 assert_eq!(find("Invalid_input").entity_type, "exception");
2239 assert_eq!(find("Serializable").entity_type, "module_type");
2240 }
2241
2242 #[test]
2243 fn test_ocaml_mutual_recursion_let() {
2244 let code = r#"
2245let rec even n = (n = 0) || odd (n - 1)
2246and odd n = (n <> 0) && even (n - 1)
2247
2248let rec ping x = pong (x - 1)
2249and pong x = if x <= 0 then 0 else ping (x - 1)
2250"#;
2251 let plugin = CodeParserPlugin;
2252 let entities = plugin.extract_entities(code, "mutual.ml");
2253 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2254 eprintln!("OCaml mutual let: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2255
2256 let find = |name: &str| entities.iter().find(|e| e.name == name)
2257 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2258
2259 assert_eq!(find("even").entity_type, "function");
2260 assert_eq!(find("odd").entity_type, "function");
2261 assert_eq!(find("ping").entity_type, "function");
2262 assert_eq!(find("pong").entity_type, "function");
2263 }
2264
2265 #[test]
2266 fn test_ocaml_mutual_recursion_module() {
2267 let code = r#"
2268module rec A : sig val x : int end = struct
2269 let x = B.y + 1
2270end
2271and B : sig val y : int end = struct
2272 let y = 0
2273end
2274"#;
2275 let plugin = CodeParserPlugin;
2276 let entities = plugin.extract_entities(code, "mutual_mod.ml");
2277 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2278 eprintln!("OCaml mutual module: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
2279
2280 let find = |name: &str| entities.iter().find(|e| e.name == name)
2281 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2282
2283 let a = find("A");
2284 let b = find("B");
2285 assert_eq!(a.entity_type, "module");
2286 assert_eq!(b.entity_type, "module");
2287
2288 let x = find("x");
2289 let y = find("y");
2290 assert!(x.parent_id.as_ref().is_some_and(|p| p == &a.id), "x should be nested under A");
2291 assert!(y.parent_id.as_ref().is_some_and(|p| p == &b.id), "y should be nested under B");
2292 }
2293
2294 #[test]
2295 fn test_ocaml_destructured_let() {
2296 let code = r#"
2297let (a, b) = (1, 2)
2298
2299let { x; y } = point
2300
2301let simple = 42
2302"#;
2303 let plugin = CodeParserPlugin;
2304 let entities = plugin.extract_entities(code, "destruct.ml");
2305 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2306 eprintln!("OCaml destructured: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2307
2308 let find = |name: &str| entities.iter().find(|e| e.name == name)
2309 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2310
2311 assert_eq!(find("a").entity_type, "value");
2312 assert_eq!(find("b").entity_type, "value");
2313 assert_eq!(find("x").entity_type, "value");
2314 assert_eq!(find("y").entity_type, "value");
2315 assert_eq!(find("simple").entity_type, "value");
2316 }
2317
2318 #[test]
2319 fn test_ocaml_mutual_recursion_class() {
2320 let code = r#"
2321class foo = object
2322 method x = 1
2323end
2324and bar = object
2325 method y = 2
2326end
2327"#;
2328 let plugin = CodeParserPlugin;
2329 let entities = plugin.extract_entities(code, "classes.ml");
2330 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2331 eprintln!("OCaml mutual class: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2332
2333 let find = |name: &str| entities.iter().find(|e| e.name == name)
2334 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2335
2336 assert_eq!(find("foo").entity_type, "class");
2337 assert_eq!(find("bar").entity_type, "class");
2338 }
2339
2340 #[test]
2341 fn test_perl_entity_extraction() {
2342 let code = r#"package Foo::Bar;
2343
2344use strict;
2345use warnings;
2346
2347sub hello {
2348 my ($self, $name) = @_;
2349 print "Hello, $name!\n";
2350}
2351
2352sub _private_helper {
2353 return 42;
2354}
2355
23561;
2357"#;
2358 let plugin = CodeParserPlugin;
2359 let entities = plugin.extract_entities(code, "Foo/Bar.pm");
2360 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2361
2362 assert!(names.contains(&"Foo::Bar"), "got: {:?}", names);
2363 assert!(names.contains(&"hello"), "got: {:?}", names);
2364 assert!(names.contains(&"_private_helper"), "got: {:?}", names);
2365
2366 let find = |name: &str| entities.iter().find(|e| e.name == name)
2367 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2368
2369 assert_eq!(find("Foo::Bar").entity_type, "package");
2370 assert_eq!(find("hello").entity_type, "function");
2371 assert_eq!(find("_private_helper").entity_type, "function");
2372 }
2373
2374 #[test]
2375 fn test_fortran_entity_extraction() {
2376 let code = r#"module math_utils
2377 implicit none
2378contains
2379 function add(a, b) result(c)
2380 integer, intent(in) :: a, b
2381 integer :: c
2382 c = a + b
2383 end function add
2384
2385 subroutine greet()
2386 print *, "hello"
2387 end subroutine greet
2388end module math_utils
2389
2390program main
2391 implicit none
2392 print *, "hello"
2393end program main
2394"#;
2395 let plugin = CodeParserPlugin;
2396 let entities = plugin.extract_entities(code, "test.f90");
2397 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2398
2399 assert!(names.contains(&"math_utils"), "got: {:?}", names);
2400 assert!(names.contains(&"add"), "got: {:?}", names);
2401 assert!(names.contains(&"greet"), "got: {:?}", names);
2402 assert!(names.contains(&"main"), "got: {:?}", names);
2403
2404 let find = |name: &str| entities.iter().find(|e| e.name == name)
2405 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2406
2407 assert_eq!(find("math_utils").entity_type, "module");
2408 assert_eq!(find("add").entity_type, "function");
2409 assert_eq!(find("greet").entity_type, "subroutine");
2410 assert_eq!(find("main").entity_type, "program");
2411
2412 assert!(find("add").parent_id.is_some());
2414 assert!(find("greet").parent_id.is_some());
2415 }
2416
2417 #[test]
2418 fn test_scala_entity_extraction() {
2419 let code = r#"
2420package com.example
2421
2422import scala.collection.mutable
2423
2424class UserService(val name: String) {
2425 def getUsers(): List[User] = db.findAll()
2426
2427 def createUser(user: User): Unit = db.save(user)
2428
2429 private def validate(user: User): Boolean = true
2430}
2431
2432object UserService {
2433 def apply(name: String): UserService = new UserService(name)
2434
2435 val DefaultName: String = "default"
2436}
2437
2438trait Repository[T] {
2439 def findById(id: String): Option[T]
2440 def findAll(): List[T]
2441}
2442
2443case class User(id: String, name: String)
2444
2445type UserId = String
2446"#;
2447 let plugin = CodeParserPlugin;
2448 let entities = plugin.extract_entities(code, "UserService.scala");
2449 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2450 eprintln!("Scala entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2451
2452 assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
2453 assert!(names.contains(&"Repository"), "Should find trait Repository, got: {:?}", names);
2454 assert!(names.contains(&"getUsers"), "Should find method getUsers, got: {:?}", names);
2455 assert!(names.contains(&"createUser"), "Should find method createUser, got: {:?}", names);
2456
2457 let get_users = entities.iter().find(|e| e.name == "getUsers").unwrap();
2459 assert!(get_users.parent_id.is_some(), "getUsers should have parent_id");
2460 }
2461
2462 #[test]
2463 fn test_scala3_entity_extraction() {
2464 let code = r#"
2465package com.example
2466
2467enum Color:
2468 case Red, Green, Blue
2469
2470enum Planet(mass: Double, radius: Double):
2471 case Mercury extends Planet(3.303e+23, 2.4397e6)
2472 case Venus extends Planet(4.869e+24, 6.0518e6)
2473
2474object Main:
2475 def main(args: Array[String]): Unit =
2476 println("Hello, World!")
2477
2478trait Greeter:
2479 def greet(name: String): String
2480
2481given Greeter with
2482 def greet(name: String): String = s"Hello, $name!"
2483
2484extension (s: String)
2485 def shout: String = s.toUpperCase + "!"
2486
2487type Predicate[A] = A => Boolean
2488"#;
2489 let plugin = CodeParserPlugin;
2490 let entities = plugin.extract_entities(code, "Main.scala");
2491 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2492 eprintln!("Scala 3 entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2493
2494 assert!(names.contains(&"Color"), "Should find enum Color, got: {:?}", names);
2495 assert!(names.contains(&"Planet"), "Should find enum Planet, got: {:?}", names);
2496 assert!(names.contains(&"Main"), "Should find object Main, got: {:?}", names);
2497 assert!(names.contains(&"Greeter"), "Should find trait Greeter, got: {:?}", names);
2498 assert!(names.contains(&"Predicate"), "Should find type alias Predicate, got: {:?}", names);
2499 }
2500
2501 #[test]
2502 fn test_zig_entity_extraction() {
2503 let code = r#"
2504const std = @import("std");
2505
2506pub const Point = struct {
2507 x: i32,
2508 y: i32,
2509};
2510
2511pub const Color = enum {
2512 red,
2513 green,
2514 blue,
2515};
2516
2517const Person = struct {
2518 name: []const u8,
2519 age: u32,
2520};
2521
2522pub fn greet(name: []const u8) void {
2523 std.debug.print("Hello, {s}!\n", .{name});
2524}
2525
2526fn add(a: i32, b: i32) i32 {
2527 return a + b;
2528}
2529
2530pub fn main() !void {
2531 greet("world");
2532}
2533
2534test "basic addition" {
2535 const result = add(2, 3);
2536 _ = result;
2537}
2538"#;
2539 let plugin = CodeParserPlugin;
2540 let entities = plugin.extract_entities(code, "main.zig");
2541 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2542 let types: std::collections::HashMap<&str, &str> = entities
2543 .iter()
2544 .map(|e| (e.name.as_str(), e.entity_type.as_str()))
2545 .collect();
2546
2547 assert!(names.contains(&"greet"), "Should find greet, got: {:?}", names);
2548 assert!(names.contains(&"add"), "Should find add, got: {:?}", names);
2549 assert!(names.contains(&"main"), "Should find main, got: {:?}", names);
2550 assert!(names.contains(&"Point"), "Should find Point, got: {:?}", names);
2551 assert!(names.contains(&"Color"), "Should find Color, got: {:?}", names);
2552 assert!(names.contains(&"Person"), "Should find Person, got: {:?}", names);
2553
2554 assert_eq!(types["greet"], "function");
2555 assert_eq!(types["add"], "function");
2556 assert_eq!(types["Point"], "struct");
2557 assert_eq!(types["Color"], "enum");
2558 assert_eq!(types["Person"], "struct");
2559 }
2560}