Skip to main content

sem_core/parser/plugins/code/
mod.rs

1mod entity_extractor;
2pub mod languages;
3
4use std::cell::RefCell;
5use std::collections::HashMap;
6
7use crate::model::entity::SemanticEntity;
8use crate::parser::plugin::SemanticParserPlugin;
9use crate::utils::hash::{content_hash, structural_hash};
10use languages::{get_all_code_extensions, get_language_config};
11use entity_extractor::extract_entities;
12
13pub struct CodeParserPlugin;
14
15// Thread-local parser cache: one Parser per language per thread.
16// Avoids creating a new Parser for every file during parallel graph builds.
17thread_local! {
18    static PARSER_CACHE: RefCell<HashMap<&'static str, tree_sitter::Parser>> = RefCell::new(HashMap::new());
19}
20
21fn language_config_for_content(
22    content: &str,
23    file_path: &str,
24) -> Option<&'static languages::LanguageConfig> {
25    let ext = std::path::Path::new(file_path)
26        .extension()
27        .and_then(|e| e.to_str())
28        .map(|e| format!(".{}", e.to_lowercase()))
29        .unwrap_or_default();
30
31    get_language_config(&ext).or_else(|| {
32        detect_ext_from_content(content).and_then(|shebang_ext| get_language_config(&shebang_ext))
33    })
34}
35
36fn parse_tree(
37    config: &'static languages::LanguageConfig,
38    content: &str,
39) -> Option<tree_sitter::Tree> {
40    let language = (config.get_language)()?;
41
42    PARSER_CACHE.with(|cache| {
43        let mut cache = cache.borrow_mut();
44        let parser = cache.entry(config.id).or_insert_with(|| {
45            let mut p = tree_sitter::Parser::new();
46            let _ = p.set_language(&language);
47            p
48        });
49
50        parser.parse(content.as_bytes(), None)
51    })
52}
53
54fn has_non_comment_content(node: tree_sitter::Node, source: &[u8]) -> bool {
55    let mut worklist = Vec::new();
56    let mut cursor = node.walk();
57    worklist.extend(node.children(&mut cursor));
58
59    while let Some(node) = worklist.pop() {
60        if is_comment_node(node.kind()) {
61            continue;
62        }
63
64        if node.child_count() == 0 {
65            let start = node.start_byte();
66            let end = node.end_byte();
67            if start < end
68                && end <= source.len()
69                && source[start..end].iter().any(|b| !b.is_ascii_whitespace())
70            {
71                return true;
72            }
73            continue;
74        }
75
76        let mut cursor = node.walk();
77        worklist.extend(node.children(&mut cursor));
78    }
79
80    false
81}
82
83fn is_comment_node(kind: &str) -> bool {
84    matches!(
85        kind,
86        "comment" | "line_comment" | "block_comment" | "doc_comment" | "tag_comment"
87    )
88}
89
90fn shebang_line(content: &str) -> Option<&str> {
91    content
92        .strip_prefix("#!")
93        .map(|rest| rest.lines().next().unwrap_or(""))
94}
95
96impl SemanticParserPlugin for CodeParserPlugin {
97    fn id(&self) -> &str {
98        "code"
99    }
100
101    fn extensions(&self) -> &[&str] {
102        get_all_code_extensions()
103    }
104
105    fn extract_entities(&self, content: &str, file_path: &str) -> Vec<SemanticEntity> {
106        self.extract_entities_with_tree(content, file_path).0
107    }
108
109    fn extract_entities_with_tree(
110        &self,
111        content: &str,
112        file_path: &str,
113    ) -> (Vec<SemanticEntity>, Option<tree_sitter::Tree>) {
114        let Some(config) = language_config_for_content(content, file_path) else {
115            return (Vec::new(), None);
116        };
117
118        let Some(tree) = parse_tree(config, content) else {
119            return (Vec::new(), None);
120        };
121
122        let entities = extract_entities(&tree, file_path, config, content);
123        (entities, Some(tree))
124    }
125
126    fn structural_hash_content(&self, content: &str, file_path: &str) -> Option<String> {
127        let config = language_config_for_content(content, file_path)?;
128        let tree = parse_tree(config, content)?;
129        let shebang = shebang_line(content);
130        if shebang.is_none() && !has_non_comment_content(tree.root_node(), content.as_bytes()) {
131            return Some(String::new());
132        }
133        let structural = structural_hash(tree.root_node(), content.as_bytes());
134        match shebang {
135            Some(shebang) => Some(content_hash(&format!("shebang:{shebang}\n{structural}"))),
136            None => Some(structural),
137        }
138    }
139}
140
141use crate::parser::registry::detect_ext_from_content;
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146
147    #[test]
148    fn test_java_entity_extraction() {
149        let code = r#"
150package com.example;
151
152import java.util.List;
153
154public class UserService {
155    private String name;
156
157    public UserService(String name) {
158        this.name = name;
159    }
160
161    public List<User> getUsers() {
162        return db.findAll();
163    }
164
165    public void createUser(User user) {
166        db.save(user);
167    }
168}
169
170interface Repository<T> {
171    T findById(String id);
172    List<T> findAll();
173}
174
175enum Status {
176    ACTIVE,
177    INACTIVE,
178    DELETED
179}
180"#;
181        let plugin = CodeParserPlugin;
182        let entities = plugin.extract_entities(code, "UserService.java");
183        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
184        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
185        eprintln!("Java entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
186
187        assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
188        assert!(names.contains(&"Repository"), "Should find interface Repository, got: {:?}", names);
189        assert!(names.contains(&"Status"), "Should find enum Status, got: {:?}", names);
190    }
191
192    #[test]
193    fn test_java_nested_methods() {
194        let code = r#"
195public class Calculator {
196    public int add(int a, int b) {
197        return a + b;
198    }
199
200    public int subtract(int a, int b) {
201        return a - b;
202    }
203}
204"#;
205        let plugin = CodeParserPlugin;
206        let entities = plugin.extract_entities(code, "Calculator.java");
207        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
208        eprintln!("Java nested: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
209
210        assert!(names.contains(&"Calculator"), "Should find Calculator class");
211        assert!(names.contains(&"add"), "Should find add method, got: {:?}", names);
212        assert!(names.contains(&"subtract"), "Should find subtract method, got: {:?}", names);
213
214        // Methods should have Calculator as parent
215        let add = entities.iter().find(|e| e.name == "add").unwrap();
216        assert!(add.parent_id.is_some(), "add should have parent_id");
217    }
218
219    #[test]
220    fn test_c_entity_extraction() {
221        let code = r#"
222#include <stdio.h>
223
224struct Point {
225    int x;
226    int y;
227};
228
229enum Color {
230    RED,
231    GREEN,
232    BLUE
233};
234
235typedef struct {
236    char name[50];
237    int age;
238} Person;
239
240void greet(const char* name) {
241    printf("Hello, %s!\n", name);
242}
243
244int add(int a, int b) {
245    return a + b;
246}
247
248int main() {
249    greet("world");
250    return 0;
251}
252"#;
253        let plugin = CodeParserPlugin;
254        let entities = plugin.extract_entities(code, "main.c");
255        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
256        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
257        eprintln!("C entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
258
259        assert!(names.contains(&"greet"), "Should find greet function, got: {:?}", names);
260        assert!(names.contains(&"add"), "Should find add function, got: {:?}", names);
261        assert!(names.contains(&"main"), "Should find main function, got: {:?}", names);
262        assert!(names.contains(&"Point"), "Should find Point struct, got: {:?}", names);
263        assert!(names.contains(&"Color"), "Should find Color enum, got: {:?}", names);
264    }
265
266    #[test]
267    fn test_c_function_locals_not_extracted() {
268        let code = r#"
269int global_count = 0;
270int helper(void);
271
272int main(void) {
273    int local = helper();
274    const char *message = "hello";
275    return local + global_count;
276}
277"#;
278        let plugin = CodeParserPlugin;
279        let entities = plugin.extract_entities(code, "main.c");
280        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
281
282        assert!(names.contains(&"global_count"), "got: {:?}", names);
283        assert!(names.contains(&"helper"), "got: {:?}", names);
284        assert!(names.contains(&"main"), "got: {:?}", names);
285        assert!(!names.contains(&"local"), "got: {:?}", names);
286        assert!(!names.contains(&"message"), "got: {:?}", names);
287    }
288
289    #[test]
290    fn test_cpp_entity_extraction() {
291        let code = "namespace math {\nclass Vector3 {\npublic:\n    float length() const { return 0; }\n};\n}\nvoid greet() {}\n";
292        let plugin = CodeParserPlugin;
293        let entities = plugin.extract_entities(code, "main.cpp");
294        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
295        assert!(names.contains(&"math"), "got: {:?}", names);
296        assert!(names.contains(&"Vector3"), "got: {:?}", names);
297        assert!(names.contains(&"greet"), "got: {:?}", names);
298    }
299
300    #[test]
301    fn test_cpp_function_locals_not_extracted() {
302        let code = r#"
303int global_value = 1;
304int helper();
305
306int main() {
307    int local = helper();
308    auto lambda = []() {
309        int lambda_local = 3;
310        return lambda_local;
311    };
312    return local + lambda();
313}
314"#;
315        let plugin = CodeParserPlugin;
316        let entities = plugin.extract_entities(code, "main.cpp");
317        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
318
319        assert!(names.contains(&"global_value"), "got: {:?}", names);
320        assert!(names.contains(&"helper"), "got: {:?}", names);
321        assert!(names.contains(&"main"), "got: {:?}", names);
322        assert!(!names.contains(&"local"), "got: {:?}", names);
323        assert!(!names.contains(&"lambda"), "got: {:?}", names);
324        assert!(!names.contains(&"lambda_local"), "got: {:?}", names);
325    }
326
327    #[test]
328    fn test_ruby_entity_extraction() {
329        let code = "module Auth\n  class User\n    def greet\n      \"hi\"\n    end\n  end\nend\ndef helper(x)\n  x * 2\nend\n";
330        let plugin = CodeParserPlugin;
331        let entities = plugin.extract_entities(code, "auth.rb");
332        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
333        assert!(names.contains(&"Auth"), "got: {:?}", names);
334        assert!(names.contains(&"User"), "got: {:?}", names);
335        assert!(names.contains(&"helper"), "got: {:?}", names);
336    }
337
338    #[test]
339    fn test_csharp_entity_extraction() {
340        let code = "namespace MyApp {\npublic class User {\n    public string GetName() { return \"\"; }\n}\npublic enum Role { Admin, User }\n}\n";
341        let plugin = CodeParserPlugin;
342        let entities = plugin.extract_entities(code, "Models.cs");
343        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
344        assert!(names.contains(&"MyApp"), "got: {:?}", names);
345        assert!(names.contains(&"User"), "got: {:?}", names);
346        assert!(names.contains(&"Role"), "got: {:?}", names);
347    }
348
349    #[test]
350    fn test_swift_entity_extraction() {
351        let code = r#"
352import Foundation
353
354typealias Handler = (Int) -> Void
355
356prefix operator ~~~
357
358class UserService {
359    var name: String
360
361    init(name: String) {
362        self.name = name
363    }
364
365    deinit {
366        print("freed")
367    }
368
369    func getUsers() -> [User] {
370        return db.findAll()
371    }
372}
373
374struct Point {
375    var x: Double
376    var y: Double
377
378    subscript(index: Int) -> Double {
379        return x + y + Double(index)
380    }
381}
382
383enum Status {
384    case active
385    case inactive
386    case deleted
387}
388
389protocol Repository {
390    associatedtype Canvas
391    func findById(id: String) -> Canvas?
392    func findAll() -> [Canvas]
393}
394
395func helper(x: Int) -> Int {
396    return x * 2
397}
398"#;
399        let plugin = CodeParserPlugin;
400        let entities = plugin.extract_entities(code, "UserService.swift");
401        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
402        eprintln!("Swift entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
403
404        assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
405        assert!(names.contains(&"Point"), "Should find struct Point, got: {:?}", names);
406        assert!(names.contains(&"Status"), "Should find enum Status, got: {:?}", names);
407        assert!(names.contains(&"Repository"), "Should find protocol Repository, got: {:?}", names);
408        assert!(names.contains(&"Canvas"), "Should find associatedtype Canvas, got: {:?}", names);
409        assert!(names.contains(&"Handler"), "Should find typealias Handler, got: {:?}", names);
410        assert!(names.contains(&"~~~"), "Should find custom operator ~~~, got: {:?}", names);
411        assert!(names.contains(&"init"), "Should find initializer init, got: {:?}", names);
412        assert!(names.contains(&"deinit"), "Should find deinitializer deinit, got: {:?}", names);
413        assert!(names.contains(&"subscript"), "Should find subscript, got: {:?}", names);
414        assert!(names.contains(&"helper"), "Should find function helper, got: {:?}", names);
415
416        let handler = entities.iter().find(|e| e.name == "Handler").unwrap();
417        assert_eq!(handler.entity_type, "type");
418        assert!(handler.parent_id.is_none());
419
420        let operator = entities.iter().find(|e| e.name == "~~~").unwrap();
421        assert_eq!(operator.entity_type, "operator");
422        assert!(operator.parent_id.is_none());
423
424        let user_service = entities.iter().find(|e| e.name == "UserService").unwrap();
425        assert_eq!(user_service.entity_type, "class");
426
427        let initializer = entities.iter().find(|e| e.name == "init").unwrap();
428        assert_eq!(initializer.entity_type, "init");
429        assert_eq!(initializer.parent_id.as_deref(), Some(user_service.id.as_str()));
430        assert_eq!(initializer.id, "UserService.swift::class::UserService::init");
431
432        let deinitializer = entities.iter().find(|e| e.name == "deinit").unwrap();
433        assert_eq!(deinitializer.entity_type, "deinit");
434        assert_eq!(deinitializer.parent_id.as_deref(), Some(user_service.id.as_str()));
435        assert_eq!(
436            deinitializer.id,
437            "UserService.swift::class::UserService::deinit"
438        );
439
440        let point = entities.iter().find(|e| e.name == "Point").unwrap();
441        assert_eq!(point.entity_type, "struct");
442
443        let subscript = entities.iter().find(|e| e.name == "subscript").unwrap();
444        assert_eq!(subscript.entity_type, "subscript");
445        assert_eq!(subscript.parent_id.as_deref(), Some(point.id.as_str()));
446        assert_eq!(
447            subscript.id,
448            "UserService.swift::struct::Point::subscript"
449        );
450
451        let status = entities.iter().find(|e| e.name == "Status").unwrap();
452        assert_eq!(status.entity_type, "enum");
453
454        let repository = entities.iter().find(|e| e.name == "Repository").unwrap();
455        assert_eq!(repository.entity_type, "protocol");
456        assert_eq!(repository.id, "UserService.swift::protocol::Repository");
457
458        let canvas = entities.iter().find(|e| e.name == "Canvas").unwrap();
459        assert_eq!(canvas.entity_type, "associatedtype");
460        assert_eq!(canvas.parent_id.as_deref(), Some(repository.id.as_str()));
461        assert_eq!(
462            canvas.id,
463            "UserService.swift::protocol::Repository::Canvas"
464        );
465    }
466
467    #[test]
468    fn test_swift_multi_binding_property_extraction() {
469        let code = r#"
470struct Point {
471    var x, y: Int
472}
473"#;
474        let plugin = CodeParserPlugin;
475        let entities = plugin.extract_entities(code, "Point.swift");
476        let point = entities.iter().find(|e| e.name == "Point").unwrap();
477        let properties: Vec<_> = entities
478            .iter()
479            .filter(|e| e.entity_type == "property")
480            .collect();
481
482        assert_eq!(
483            properties.iter().map(|e| e.name.as_str()).collect::<Vec<_>>(),
484            vec!["x", "y"]
485        );
486        assert!(properties
487            .iter()
488            .all(|property| property.parent_id.as_deref() == Some(point.id.as_str())));
489        assert_eq!(properties[0].content, "var x: Int");
490        assert_eq!(properties[1].content, "var y: Int");
491    }
492
493    #[test]
494    fn test_swift_multi_binding_property_content_is_per_binding() {
495        let typed_code = r#"
496struct Types {
497    var x: Int, y: String
498}
499"#;
500        let plugin = CodeParserPlugin;
501        let typed_entities = plugin.extract_entities(typed_code, "Types.swift");
502        let typed_properties: Vec<_> = typed_entities
503            .iter()
504            .filter(|e| e.entity_type == "property")
505            .collect();
506        assert_eq!(typed_properties[0].content, "var x: Int");
507        assert_eq!(typed_properties[1].content, "var y: String");
508
509        let mixed_code = r#"
510struct Mixed {
511    var x, y: Int, z: String
512}
513"#;
514        let mixed_entities = plugin.extract_entities(mixed_code, "Mixed.swift");
515        let mixed_properties: Vec<_> = mixed_entities
516            .iter()
517            .filter(|e| e.entity_type == "property")
518            .collect();
519        assert_eq!(mixed_properties[0].content, "var x: Int");
520        assert_eq!(mixed_properties[1].content, "var y: Int");
521        assert_eq!(mixed_properties[2].content, "var z: String");
522
523        let generic_code = r#"
524struct GenericTypes {
525    var lookup: Dictionary<String, Int>, count: Int
526}
527"#;
528        let generic_entities = plugin.extract_entities(generic_code, "GenericTypes.swift");
529        let generic_properties: Vec<_> = generic_entities
530            .iter()
531            .filter(|e| e.entity_type == "property")
532            .collect();
533        assert_eq!(
534            generic_properties[0].content,
535            "var lookup: Dictionary<String, Int>"
536        );
537        assert_eq!(generic_properties[1].content, "var count: Int");
538
539        let initializer_code = r#"
540struct Initializers {
541    var a = Foo(), b = Bar()
542}
543"#;
544        let initializer_entities = plugin.extract_entities(initializer_code, "Initializers.swift");
545        let initializer_properties: Vec<_> = initializer_entities
546            .iter()
547            .filter(|e| e.entity_type == "property")
548            .collect();
549        assert!(initializer_properties[0].content.contains("Foo()"));
550        assert!(!initializer_properties[0].content.contains("Bar()"));
551        assert!(initializer_properties[1].content.contains("Bar()"));
552        assert!(!initializer_properties[1].content.contains("Foo()"));
553
554        let constants_code = r#"
555struct Constants {
556    let first, second, third: Int
557}
558"#;
559        let constants_entities = plugin.extract_entities(constants_code, "Constants.swift");
560        let constants_properties: Vec<_> = constants_entities
561            .iter()
562            .filter(|e| e.entity_type == "property")
563            .collect();
564        assert_eq!(
565            constants_properties.iter().map(|e| e.name.as_str()).collect::<Vec<_>>(),
566            vec!["first", "second", "third"]
567        );
568        assert_eq!(constants_properties[0].content, "let first: Int");
569        assert_eq!(constants_properties[1].content, "let second: Int");
570        assert_eq!(constants_properties[2].content, "let third: Int");
571
572        let semicolon_code = r#"
573struct Semicolons {
574    var left, right: Int; var next: Int
575}
576"#;
577        let semicolon_entities = plugin.extract_entities(semicolon_code, "Semicolons.swift");
578        let semicolon_properties: Vec<_> = semicolon_entities
579            .iter()
580            .filter(|e| e.entity_type == "property")
581            .collect();
582        assert_eq!(semicolon_properties[0].content, "var left: Int");
583        assert_eq!(semicolon_properties[1].content, "var right: Int");
584        assert_eq!(semicolon_properties[2].content, "var next: Int");
585    }
586
587    #[test]
588    fn test_swift_body_locals_not_extracted_as_properties() {
589        let code = r#"
590class Cache {
591    var stored: Int
592
593    var computed: Int {
594        let computedLocal = stored + 1
595        func computedNested() -> Int {
596            return computedLocal
597        }
598        return computedNested()
599    }
600
601    var explicit: Int {
602        get {
603            let getterLocal = stored
604            func getterNested() -> Int {
605                return getterLocal
606            }
607            return getterNested()
608        }
609    }
610
611    init(seed: Int) {
612        let initial = seed
613        self.stored = initial
614    }
615
616    func value() -> Int {
617        let doubled = stored * 2
618        var offset = doubled + 1
619        func nested() -> Int {
620            let insideNested = offset
621            return insideNested
622        }
623        return nested()
624    }
625
626    subscript(index: Int) -> Int {
627        let shifted = index + stored
628        func subscriptNested() -> Int {
629            return shifted
630        }
631        return subscriptNested()
632    }
633
634    deinit {
635        let closing = stored
636        _ = closing
637    }
638}
639"#;
640        let plugin = CodeParserPlugin;
641        let entities = plugin.extract_entities(code, "Cache.swift");
642        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
643
644        assert!(names.contains(&"Cache"), "got: {:?}", names);
645        assert!(names.contains(&"stored"), "got: {:?}", names);
646        assert!(names.contains(&"computed"), "got: {:?}", names);
647        assert!(names.contains(&"explicit"), "got: {:?}", names);
648        assert!(names.contains(&"init"), "got: {:?}", names);
649        assert!(names.contains(&"value"), "got: {:?}", names);
650        assert!(names.contains(&"computedNested"), "got: {:?}", names);
651        assert!(names.contains(&"getterNested"), "got: {:?}", names);
652        assert!(names.contains(&"nested"), "got: {:?}", names);
653        assert!(names.contains(&"subscriptNested"), "got: {:?}", names);
654        assert!(names.contains(&"subscript"), "got: {:?}", names);
655        assert!(names.contains(&"deinit"), "got: {:?}", names);
656        assert!(!names.contains(&"Int"), "got: {:?}", names);
657
658        for local in [
659            "computedLocal",
660            "getterLocal",
661            "initial",
662            "doubled",
663            "offset",
664            "insideNested",
665            "shifted",
666            "closing",
667        ] {
668            assert!(!names.contains(&local), "{local} should not be an entity. Got: {:?}", names);
669        }
670    }
671
672    #[test]
673    fn test_swift_suppressed_multi_binding_initializers_are_traversed() {
674        let code = r#"
675func outer() {
676    let a = { func innerA() -> Int { 1 } },
677        b = { func innerB() -> Int { 2 } }
678}
679"#;
680        let plugin = CodeParserPlugin;
681        let entities = plugin.extract_entities(code, "Locals.swift");
682        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
683
684        assert!(names.contains(&"outer"), "got: {:?}", names);
685        assert!(names.contains(&"innerA"), "got: {:?}", names);
686        assert!(names.contains(&"innerB"), "got: {:?}", names);
687        assert!(!names.contains(&"a"), "local binding should stay suppressed: {:?}", names);
688        assert!(!names.contains(&"b"), "local binding should stay suppressed: {:?}", names);
689    }
690
691    #[test]
692    fn test_swift_conditional_compilation_inside_struct() {
693        let code = r#"
694import ArgumentParser
695
696public struct TuistCommand: AsyncParsableCommand {
697    public init() {}
698
699    public static var configuration: CommandConfiguration {
700        let comment = "brace in string }"
701        let multiline = """
702        brace in multiline }
703        escaped \"""
704        """
705        /* brace in comment } */
706        CommandConfiguration(commandName: "tuist")
707    }
708
709    #if os(macOS)
710        public static var groupedSubcommands: [ParsableCommand.Type] {
711            [InstallCommand.self]
712        }
713    #else
714        public static var groupedSubcommands: [ParsableCommand.Type] {
715            []
716        }
717    #endif
718
719    public func run() async throws {}
720}
721"#;
722        let plugin = CodeParserPlugin;
723        let entities = plugin.extract_entities(code, "TuistCommand.swift");
724        eprintln!(
725            "Swift conditional entities: {:?}",
726            entities
727                .iter()
728                .map(|e| (&e.name, &e.entity_type, &e.parent_id))
729                .collect::<Vec<_>>()
730        );
731
732        let command = entities
733            .iter()
734            .find(|e| e.name == "TuistCommand")
735            .expect("Should recover TuistCommand struct");
736        assert_eq!(command.entity_type, "struct");
737        assert!(command.parent_id.is_none());
738
739        let renamed_code = code.replace("TuistCommand", "RenamedCommand");
740        let renamed_entities = plugin.extract_entities(&renamed_code, "TuistCommand.swift");
741        let renamed_command = renamed_entities
742            .iter()
743            .find(|e| e.name == "RenamedCommand")
744            .expect("Should recover renamed command struct");
745        assert_eq!(command.structural_hash, renamed_command.structural_hash);
746
747        for member in ["init", "configuration", "run"] {
748            let entity = entities
749                .iter()
750                .find(|e| e.name == member)
751                .unwrap_or_else(|| panic!("Should find {member}"));
752            assert_eq!(entity.parent_id.as_deref(), Some(command.id.as_str()));
753        }
754
755        let grouped_subcommands: Vec<_> = entities
756            .iter()
757            .filter(|e| e.name == "groupedSubcommands")
758            .collect();
759        assert_eq!(grouped_subcommands.len(), 2);
760        assert!(grouped_subcommands
761            .iter()
762            .all(|entity| entity.parent_id.as_deref() == Some(command.id.as_str())));
763    }
764
765    #[test]
766    fn test_swift_conditional_compilation_with_interpolated_brace_string() {
767        let plugin = CodeParserPlugin;
768        for (container_name, code) in [
769            (
770                "Config",
771                r#"
772class Config {
773    let tpl = "prefix \("}") suffix"
774#if DEBUG
775    func dump() { print(tpl) }
776#endif
777    func render() -> String { return tpl }
778}
779
780struct Tail { let q: Int }
781"#,
782            ),
783            (
784                "RawConfig",
785                r##"
786class RawConfig {
787    let tpl = #"prefix \#("{") suffix"#
788#if DEBUG
789    func dump() { print(tpl) }
790#endif
791    func render() -> String { return tpl }
792}
793"##,
794            ),
795            (
796                "MultilineConfig",
797                r#"
798class MultilineConfig {
799    let tpl = """
800    prefix \("}") suffix
801    """
802#if DEBUG
803    func dump() { print(tpl) }
804#endif
805    func render() -> String { return tpl }
806}
807"#,
808            ),
809            (
810                "ClosureConfig",
811                r#"
812class ClosureConfig {
813    let tpl = "prefix \(["}"].map { $0 }.joined()) suffix"
814#if DEBUG
815    func dump() { print(tpl) }
816#endif
817    func render() -> String { return tpl }
818}
819"#,
820            ),
821        ] {
822            let file_path = format!("{container_name}.swift");
823            let entities = plugin.extract_entities(code, &file_path);
824            let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
825            let container = entities
826                .iter()
827                .find(|e| e.name == container_name)
828                .unwrap_or_else(|| {
829                    panic!("Should recover {container_name}, got: {names:?}");
830                });
831            assert_eq!(container.entity_type, "class");
832            assert!(container.parent_id.is_none());
833
834            for member in ["tpl", "dump", "render"] {
835                let entity = entities
836                    .iter()
837                    .find(|e| e.name == member)
838                    .unwrap_or_else(|| {
839                        panic!("Should find {member} in {container_name}, got: {names:?}");
840                    });
841                assert_eq!(entity.parent_id.as_deref(), Some(container.id.as_str()));
842            }
843        }
844    }
845
846    #[test]
847    fn test_elixir_entity_extraction() {
848        let code = r#"
849defmodule MyApp.Accounts do
850  def create_user(attrs) do
851    %User{}
852    |> User.changeset(attrs)
853    |> Repo.insert()
854  end
855
856  defp validate(attrs) do
857    # private helper
858    :ok
859  end
860
861  defmacro is_admin(user) do
862    quote do
863      unquote(user).role == :admin
864    end
865  end
866
867  defguard is_positive(x) when is_integer(x) and x > 0
868end
869
870defprotocol Printable do
871  def to_string(data)
872end
873
874defimpl Printable, for: Integer do
875  def to_string(i), do: Integer.to_string(i)
876end
877"#;
878        let plugin = CodeParserPlugin;
879        let entities = plugin.extract_entities(code, "accounts.ex");
880        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
881        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
882        eprintln!("Elixir entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
883
884        assert!(names.contains(&"MyApp.Accounts"), "Should find module, got: {:?}", names);
885        assert!(names.contains(&"create_user"), "Should find def, got: {:?}", names);
886        assert!(names.contains(&"validate"), "Should find defp, got: {:?}", names);
887        assert!(names.contains(&"is_admin"), "Should find defmacro, got: {:?}", names);
888        assert!(names.contains(&"Printable"), "Should find defprotocol, got: {:?}", names);
889
890        // Verify nesting: create_user should have MyApp.Accounts as parent
891        let create_user = entities.iter().find(|e| e.name == "create_user").unwrap();
892        assert!(create_user.parent_id.is_some(), "create_user should be nested under module");
893    }
894
895    #[test]
896    fn test_bash_entity_extraction() {
897        let code = r#"#!/bin/bash
898
899greet() {
900    echo "Hello, $1!"
901}
902
903function deploy {
904    echo "deploying..."
905}
906
907# not a function
908echo "main script"
909"#;
910        let plugin = CodeParserPlugin;
911        let entities = plugin.extract_entities(code, "deploy.sh");
912        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
913        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
914        eprintln!("Bash entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
915
916        assert!(names.contains(&"greet"), "Should find greet(), got: {:?}", names);
917        assert!(names.contains(&"deploy"), "Should find function deploy, got: {:?}", names);
918        assert_eq!(entities.len(), 2, "Should only find functions, got: {:?}", names);
919    }
920
921    #[test]
922    fn test_typescript_entity_extraction() {
923        // Existing language should still work
924        let code = r#"
925export function hello(): string {
926    return "hello";
927}
928
929export class Greeter {
930    greet(name: string): string {
931        return `Hello, ${name}!`;
932    }
933}
934"#;
935        let plugin = CodeParserPlugin;
936        let entities = plugin.extract_entities(code, "test.ts");
937        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
938        assert!(names.contains(&"hello"), "Should find hello function");
939        assert!(names.contains(&"Greeter"), "Should find Greeter class");
940    }
941
942    #[test]
943    fn test_same_line_typescript_overload_ids_are_unique() {
944        let code = "function f(a: number): void {}; function f(a: string): void {}\n";
945        let plugin = CodeParserPlugin;
946        let entities = plugin.extract_entities(code, "over.ts");
947        let overloads: Vec<&SemanticEntity> = entities
948            .iter()
949            .filter(|entity| entity.name == "f" && entity.entity_type == "function")
950            .collect();
951        let ids: Vec<&str> = overloads.iter().map(|entity| entity.id.as_str()).collect();
952
953        assert_eq!(overloads.len(), 2, "expected both overloads, got: {entities:?}");
954        assert_eq!(ids, vec!["over.ts::function::f@L1#1", "over.ts::function::f@L1#2"]);
955    }
956
957    #[test]
958    fn test_same_line_duplicate_parent_ids_are_propagated_to_children() {
959        let code = "class C { m(){ return 1 } } class C { m(){ return 2 } }\n";
960        let plugin = CodeParserPlugin;
961        let entities = plugin.extract_entities(code, "c.ts");
962        let classes: Vec<&SemanticEntity> = entities
963            .iter()
964            .filter(|entity| entity.name == "C" && entity.entity_type == "class")
965            .collect();
966        let methods: Vec<&SemanticEntity> = entities
967            .iter()
968            .filter(|entity| entity.name == "m" && entity.entity_type == "method")
969            .collect();
970
971        assert_eq!(classes.len(), 2, "expected both classes, got: {entities:?}");
972        assert_eq!(methods.len(), 2, "expected both methods, got: {entities:?}");
973        assert_eq!(classes[0].id, "c.ts::class::C@L1#1");
974        assert_eq!(classes[1].id, "c.ts::class::C@L1#2");
975        assert_eq!(methods[0].parent_id.as_deref(), Some("c.ts::class::C@L1#1"));
976        assert_eq!(methods[1].parent_id.as_deref(), Some("c.ts::class::C@L1#2"));
977        assert_eq!(methods[0].id, "c.ts::class::C@L1#1::m");
978        assert_eq!(methods[1].id, "c.ts::class::C@L1#2::m");
979    }
980
981    #[test]
982    fn test_module_typescript_entity_extraction() {
983        let code = r#"
984export function hello(): string {
985    return "hello";
986}
987"#;
988        let plugin = CodeParserPlugin;
989        let entities = plugin.extract_entities(code, "test.mts");
990        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
991
992        assert!(names.contains(&"hello"), "Should find hello function");
993    }
994
995    #[test]
996    fn test_commonjs_typescript_entity_extraction() {
997        let code = r#"
998export class Greeter {
999    greet(name: string): string {
1000        return `Hello, ${name}!`;
1001    }
1002}
1003"#;
1004        let plugin = CodeParserPlugin;
1005        let entities = plugin.extract_entities(code, "test.cts");
1006        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1007
1008        assert!(names.contains(&"Greeter"), "Should find Greeter class");
1009        assert!(names.contains(&"greet"), "Should find greet method");
1010    }
1011
1012    #[test]
1013    fn test_typescript_generator_function_entity_extraction() {
1014        let code = r#"
1015export async function* streamUsers(): AsyncGenerator<string> {
1016    yield "alice";
1017}
1018"#;
1019        let plugin = CodeParserPlugin;
1020        let entities = plugin.extract_entities(code, "stream.ts");
1021        let stream = entities.iter().find(|e| e.name == "streamUsers");
1022
1023        assert!(stream.is_some(), "Should find generator function, got: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1024        assert_eq!(stream.unwrap().entity_type, "function");
1025    }
1026
1027    #[test]
1028    fn test_javascript_generator_function_entity_extraction() {
1029        let code = r#"
1030export function* ids() {
1031    yield 1;
1032    yield 2;
1033}
1034"#;
1035        let plugin = CodeParserPlugin;
1036        let entities = plugin.extract_entities(code, "ids.js");
1037        let ids = entities.iter().find(|e| e.name == "ids");
1038
1039        assert!(ids.is_some(), "Should find generator function, got: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1040        assert_eq!(ids.unwrap().entity_type, "function");
1041    }
1042
1043    #[test]
1044    fn test_nested_functions_typescript() {
1045        let code = r#"
1046function outer() {
1047    function inner() {
1048        return 42;
1049    }
1050    return inner();
1051}
1052"#;
1053        let plugin = CodeParserPlugin;
1054        let entities = plugin.extract_entities(code, "nested.ts");
1055        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1056        eprintln!("Nested TS: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
1057
1058        assert!(names.contains(&"outer"), "Should find outer, got: {:?}", names);
1059        assert!(names.contains(&"inner"), "Should find inner, got: {:?}", names);
1060
1061        let inner = entities.iter().find(|e| e.name == "inner").unwrap();
1062        assert!(inner.parent_id.is_some(), "inner should have parent_id");
1063    }
1064
1065    #[test]
1066    fn test_typescript_nested_anonymous_class_fields() {
1067        let code = r#"
1068class L1 {
1069  L2 = class {
1070    L3 = class {
1071      L4 = class {
1072        method() { return 1; }
1073      };
1074    };
1075  };
1076}
1077"#;
1078        let plugin = CodeParserPlugin;
1079        let entities = plugin.extract_entities(code, "a.ts");
1080        let find = |name: &str| {
1081            entities.iter().find(|e| e.name == name).unwrap_or_else(|| {
1082                panic!(
1083                    "missing {name}; got: {:?}",
1084                    entities
1085                        .iter()
1086                        .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1087                        .collect::<Vec<_>>()
1088                )
1089            })
1090        };
1091
1092        let l1 = find("L1");
1093        assert_eq!(l1.entity_type, "class");
1094        let l1_id = l1.id.clone();
1095
1096        let l2 = find("L2");
1097        assert_eq!(l2.entity_type, "field");
1098        assert_eq!(l2.parent_id.as_deref(), Some(l1_id.as_str()));
1099        let l2_id = l2.id.clone();
1100
1101        let l3 = find("L3");
1102        assert_eq!(l3.entity_type, "field");
1103        assert_eq!(l3.parent_id.as_deref(), Some(l2_id.as_str()));
1104        let l3_id = l3.id.clone();
1105
1106        let l4 = find("L4");
1107        assert_eq!(l4.entity_type, "field");
1108        assert_eq!(l4.parent_id.as_deref(), Some(l3_id.as_str()));
1109        let l4_id = l4.id.clone();
1110
1111        let method = find("method");
1112        assert_eq!(method.entity_type, "method");
1113        assert_eq!(method.parent_id.as_deref(), Some(l4_id.as_str()));
1114        assert_eq!(method.id, "a.ts::class::L1::L2::L3::L4::method");
1115    }
1116
1117    #[test]
1118    fn test_nested_functions_python() {
1119        let code = "def outer():\n    def inner():\n        return 42\n    return inner()\n";
1120        let plugin = CodeParserPlugin;
1121        let entities = plugin.extract_entities(code, "nested.py");
1122        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1123
1124        assert!(names.contains(&"outer"), "got: {:?}", names);
1125        assert!(names.contains(&"inner"), "got: {:?}", names);
1126
1127        let inner = entities.iter().find(|e| e.name == "inner").unwrap();
1128        assert!(inner.parent_id.is_some(), "inner should have parent_id");
1129    }
1130
1131    #[test]
1132    fn test_nested_functions_rust() {
1133        let code = "fn outer() {\n    fn inner() -> i32 {\n        42\n    }\n    inner();\n}\n";
1134        let plugin = CodeParserPlugin;
1135        let entities = plugin.extract_entities(code, "nested.rs");
1136        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1137
1138        assert!(names.contains(&"outer"), "got: {:?}", names);
1139        assert!(names.contains(&"inner"), "got: {:?}", names);
1140
1141        let inner = entities.iter().find(|e| e.name == "inner").unwrap();
1142        assert!(inner.parent_id.is_some(), "inner should have parent_id");
1143    }
1144
1145    #[test]
1146    fn test_rust_impl_blocks_unique_names() {
1147        let code = r#"
1148trait Greeting {
1149    fn greet(&self) -> String;
1150}
1151
1152struct Person;
1153struct Robot;
1154struct Cat;
1155
1156impl Greeting for Person {
1157    fn greet(&self) -> String { "Hello".to_string() }
1158}
1159
1160impl Greeting for Robot {
1161    fn greet(&self) -> String { "Beep".to_string() }
1162}
1163
1164impl Greeting for Cat {
1165    fn greet(&self) -> String { "Meow".to_string() }
1166}
1167"#;
1168        let plugin = CodeParserPlugin;
1169        let entities = plugin.extract_entities(code, "impls.rs");
1170        let impl_entities: Vec<&_> = entities.iter()
1171            .filter(|e| e.entity_type == "impl")
1172            .collect();
1173        let names: Vec<&str> = impl_entities.iter().map(|e| e.name.as_str()).collect();
1174
1175        assert_eq!(impl_entities.len(), 3, "Should find 3 impl blocks, got: {:?}", names);
1176        assert!(names.contains(&"Greeting for Person"), "got: {:?}", names);
1177        assert!(names.contains(&"Greeting for Robot"), "got: {:?}", names);
1178        assert!(names.contains(&"Greeting for Cat"), "got: {:?}", names);
1179    }
1180
1181    #[test]
1182    fn test_nested_functions_go() {
1183        // Go doesn't have named nested functions, but has nested type/var declarations
1184        let code = "package main\n\nfunc outer() {\n    var x int = 42\n    _ = x\n}\n";
1185        let plugin = CodeParserPlugin;
1186        let entities = plugin.extract_entities(code, "nested.go");
1187        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1188
1189        assert!(names.contains(&"outer"), "got: {:?}", names);
1190    }
1191
1192    #[test]
1193    fn test_renamed_function_same_structural_hash() {
1194        let code_a = "def get_card():\n    return db.query('cards')\n";
1195        let code_b = "def get_card_1():\n    return db.query('cards')\n";
1196
1197        let plugin = CodeParserPlugin;
1198        let entities_a = plugin.extract_entities(code_a, "a.py");
1199        let entities_b = plugin.extract_entities(code_b, "b.py");
1200
1201        assert_eq!(entities_a.len(), 1, "Should find one entity in a");
1202        assert_eq!(entities_b.len(), 1, "Should find one entity in b");
1203        assert_eq!(entities_a[0].name, "get_card");
1204        assert_eq!(entities_b[0].name, "get_card_1");
1205
1206        // Structural hash should match since only the name differs
1207        assert_eq!(
1208            entities_a[0].structural_hash, entities_b[0].structural_hash,
1209            "Renamed function with identical body should have same structural_hash"
1210        );
1211
1212        // Content hash should differ (it includes the name)
1213        assert_ne!(
1214            entities_a[0].content_hash, entities_b[0].content_hash,
1215            "Content hash should differ since raw content includes the name"
1216        );
1217    }
1218
1219    #[test]
1220    fn test_swift_renamed_operator_same_structural_hash() {
1221        let plugin = CodeParserPlugin;
1222        let entities_a = plugin.extract_entities("prefix operator ~~~\n", "a.swift");
1223        let entities_b = plugin.extract_entities("prefix operator !!!\n", "b.swift");
1224
1225        assert_eq!(entities_a.len(), 1, "Should find one entity in a");
1226        assert_eq!(entities_b.len(), 1, "Should find one entity in b");
1227        assert_eq!(entities_a[0].name, "~~~");
1228        assert_eq!(entities_b[0].name, "!!!");
1229        assert_eq!(entities_a[0].entity_type, "operator");
1230        assert_eq!(entities_b[0].entity_type, "operator");
1231        assert_eq!(
1232            entities_a[0].structural_hash, entities_b[0].structural_hash,
1233            "Renamed operator with otherwise identical declaration should have same structural_hash"
1234        );
1235        assert_ne!(
1236            entities_a[0].content_hash, entities_b[0].content_hash,
1237            "Content hash should differ since raw content includes the operator token"
1238        );
1239    }
1240
1241    #[test]
1242    fn test_swift_synthesized_names_disambiguate_overloads() {
1243        let plugin = CodeParserPlugin;
1244        let code = r#"
1245struct Matrix {
1246    subscript(row: Int) -> Double {
1247        return Double(row)
1248    }
1249
1250    subscript(row: Int, column: Int) -> Double {
1251        return Double(row + column)
1252    }
1253}
1254
1255class Builder {
1256    init(value: Int) {}
1257    init(text: String) {}
1258}
1259"#;
1260
1261        let entities = plugin.extract_entities(code, "Overloads.swift");
1262
1263        let subscript_ids: Vec<&str> = entities
1264            .iter()
1265            .filter(|e| e.entity_type == "subscript")
1266            .map(|e| e.id.as_str())
1267            .collect();
1268        assert_eq!(subscript_ids.len(), 2);
1269        assert_ne!(subscript_ids[0], subscript_ids[1]);
1270        assert!(subscript_ids.iter().all(|id| id.contains("@L")));
1271
1272        let init_ids: Vec<&str> = entities
1273            .iter()
1274            .filter(|e| e.entity_type == "init")
1275            .map(|e| e.id.as_str())
1276            .collect();
1277        assert_eq!(init_ids.len(), 2);
1278        assert_ne!(init_ids[0], init_ids[1]);
1279        assert!(init_ids.iter().all(|id| id.contains("@L")));
1280    }
1281
1282    #[test]
1283    fn test_hcl_entity_extraction() {
1284        let code = r#"
1285region = "eu-west-1"
1286
1287variable "image_id" {
1288  type = string
1289}
1290
1291resource "aws_instance" "web" {
1292  ami = var.image_id
1293
1294  lifecycle {
1295    create_before_destroy = true
1296  }
1297}
1298"#;
1299        let plugin = CodeParserPlugin;
1300        let entities = plugin.extract_entities(code, "main.tf");
1301        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1302        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1303        eprintln!("HCL entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
1304
1305        assert!(names.contains(&"region"), "Should find top-level attribute, got: {:?}", names);
1306        assert!(names.contains(&"variable.image_id"), "Should find variable block, got: {:?}", names);
1307        assert!(names.contains(&"resource.aws_instance.web"), "Should find resource block, got: {:?}", names);
1308        assert!(
1309            names.contains(&"resource.aws_instance.web.lifecycle"),
1310            "Should find nested lifecycle block with qualified name, got: {:?}",
1311            names
1312        );
1313        assert!(!names.contains(&"ami"), "Should skip nested attributes inside blocks, got: {:?}", names);
1314        assert!(
1315            !names.contains(&"create_before_destroy"),
1316            "Should skip nested attributes inside nested blocks, got: {:?}",
1317            names
1318        );
1319
1320        let lifecycle = entities
1321            .iter()
1322            .find(|e| e.name == "resource.aws_instance.web.lifecycle")
1323            .unwrap();
1324        assert!(lifecycle.parent_id.is_some(), "lifecycle should be nested under resource");
1325        assert!(types.contains(&"attribute"), "Should preserve attribute entity type for top-level attributes");
1326    }
1327
1328    #[test]
1329    fn test_kotlin_entity_extraction() {
1330        let code = r#"
1331class UserService {
1332    val name: String = ""
1333
1334    fun greet(): String {
1335        return "Hello, $name"
1336    }
1337
1338    companion object {
1339        fun create(): UserService = UserService()
1340    }
1341}
1342
1343interface Repository {
1344    fun findById(id: Int): Any?
1345}
1346
1347object AppConfig {
1348    val version = "1.0"
1349}
1350
1351fun topLevel(x: Int): Int = x * 2
1352"#;
1353        let plugin = CodeParserPlugin;
1354        let entities = plugin.extract_entities(code, "App.kt");
1355        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1356        eprintln!("Kotlin entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1357        assert!(names.contains(&"UserService"), "got: {:?}", names);
1358        assert!(names.contains(&"greet"), "got: {:?}", names);
1359        assert!(names.contains(&"Repository"), "got: {:?}", names);
1360        assert!(names.contains(&"findById"), "got: {:?}", names);
1361        assert!(names.contains(&"AppConfig"), "got: {:?}", names);
1362        assert!(names.contains(&"topLevel"), "got: {:?}", names);
1363    }
1364
1365    #[test]
1366    fn test_xml_entity_extraction() {
1367        let code = r#"<?xml version="1.0" encoding="UTF-8"?>
1368<project>
1369    <groupId>com.example</groupId>
1370    <artifactId>my-app</artifactId>
1371    <dependencies>
1372        <dependency>
1373            <groupId>junit</groupId>
1374            <artifactId>junit</artifactId>
1375        </dependency>
1376    </dependencies>
1377    <build>
1378        <plugins>
1379            <plugin>
1380                <groupId>org.apache.maven</groupId>
1381            </plugin>
1382        </plugins>
1383    </build>
1384</project>
1385"#;
1386        let plugin = CodeParserPlugin;
1387        let entities = plugin.extract_entities(code, "pom.xml");
1388        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1389        eprintln!("XML entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1390        assert!(names.contains(&"project"), "got: {:?}", names);
1391        assert!(names.contains(&"dependencies"), "got: {:?}", names);
1392        assert!(names.contains(&"build"), "got: {:?}", names);
1393    }
1394
1395    #[test]
1396    fn test_arrow_callback_scope_boundary_typescript() {
1397        // Arrow function callbacks: locals are suppressed, but inner
1398        // class/function declarations are still extracted. Nested callbacks
1399        // also suppress their locals.
1400        let code = r#"
1401const activeQueues = [
1402  { queue: queues.fooQueue, processor: foo.process },
1403];
1404
1405activeQueues.forEach((handler: any) => {
1406  const queue = handler.queue;
1407  let retries = 0;
1408
1409  class QueueHandler {
1410    handle() { return queue; }
1411  }
1412
1413  function createHandler() {
1414    return new QueueHandler();
1415  }
1416
1417  queue.process((job) => {
1418    const orderId = job.data.orderId;
1419    return orderId;
1420  });
1421});
1422
1423function handleFailure(job: any, err: any) {
1424  console.error('failed', err);
1425}
1426"#;
1427        let plugin = CodeParserPlugin;
1428        let entities = plugin.extract_entities(code, "process.ts");
1429        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1430        let top_level: Vec<&str> = entities
1431            .iter()
1432            .filter(|e| e.parent_id.is_none())
1433            .map(|e| e.name.as_str())
1434            .collect();
1435
1436        // Top-level entities preserved
1437        assert!(top_level.contains(&"activeQueues"), "got: {:?}", top_level);
1438        assert!(top_level.contains(&"handleFailure"), "got: {:?}", top_level);
1439
1440        // Declarations inside callback extracted
1441        assert!(names.contains(&"QueueHandler"), "got: {:?}", names);
1442        assert!(names.contains(&"handle"), "got: {:?}", names);
1443        assert!(names.contains(&"createHandler"), "got: {:?}", names);
1444
1445        // Locals inside callbacks suppressed
1446        assert!(!names.contains(&"queue"), "got: {:?}", names);
1447        assert!(!names.contains(&"retries"), "got: {:?}", names);
1448        assert!(!names.contains(&"orderId"), "got: {:?}", names);
1449    }
1450
1451    #[test]
1452    fn test_top_level_iife_wrapper_still_extracts_typescript_entities() {
1453        let code = r#"
1454function factory() {
1455  class Foo {
1456    method(): number {
1457      return 1;
1458    }
1459  }
1460
1461  function bar(): Foo {
1462    return new Foo();
1463  }
1464}
1465
1466factory();
1467"#;
1468        let plugin = CodeParserPlugin;
1469        let entities = plugin.extract_entities(code, "wrapped.ts");
1470        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1471        assert!(
1472            names.contains(&"factory"),
1473            "Should find top-level wrapper function, got: {:?}",
1474            names
1475        );
1476        assert!(
1477            names.contains(&"Foo"),
1478            "Should find class inside top-level wrapper, got: {:?}",
1479            names
1480        );
1481        assert!(
1482            names.contains(&"bar"),
1483            "Should find function inside top-level wrapper, got: {:?}",
1484            names
1485        );
1486    }
1487
1488    #[test]
1489    fn test_top_level_iife_still_extracts_typescript_entities() {
1490        let code = r#"
1491(() => {
1492  class Foo {
1493    method(): number {
1494      return 1;
1495    }
1496  }
1497
1498  function bar(): Foo {
1499    return new Foo();
1500  }
1501})();
1502"#;
1503        let plugin = CodeParserPlugin;
1504        let entities = plugin.extract_entities(code, "iife.ts");
1505        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1506        assert!(
1507            names.contains(&"Foo"),
1508            "Should find class inside top-level IIFE, got: {:?}",
1509            names
1510        );
1511        assert!(
1512            names.contains(&"bar"),
1513            "Should find function inside top-level IIFE, got: {:?}",
1514            names
1515        );
1516    }
1517
1518    #[test]
1519    fn test_function_locals_not_extracted_as_nested_entities_typescript() {
1520        let code = r#"
1521export default function foo() {
1522  const x = 1;
1523  return x;
1524}
1525"#;
1526        let plugin = CodeParserPlugin;
1527        let entities = plugin.extract_entities(code, "default-export.ts");
1528        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1529        assert!(
1530            names.contains(&"foo"),
1531            "Should find exported function, got: {:?}",
1532            names
1533        );
1534        assert!(
1535            !names.contains(&"x"),
1536            "Local inside function should not be extracted as an entity, got: {:?}",
1537            names
1538        );
1539    }
1540
1541    #[test]
1542    fn test_function_expression_scope_boundary_typescript() {
1543        // Function expressions: assigned to variables, or used as callback
1544        // arguments. Locals are suppressed in all cases.
1545        let code = r#"
1546const foo = function namedExpr(x: number) {
1547  const inner = x + 1;
1548  return inner;
1549};
1550
1551const bar = function(y: number) {
1552  const local = y * 2;
1553  return local;
1554};
1555
1556const items = [1, 2, 3];
1557
1558items.forEach(function process(item) {
1559  const doubled = item * 2;
1560  console.log(doubled);
1561});
1562"#;
1563        let plugin = CodeParserPlugin;
1564        let entities = plugin.extract_entities(code, "funexpr.ts");
1565        let top_level: Vec<&str> = entities
1566            .iter()
1567            .filter(|e| e.parent_id.is_none())
1568            .map(|e| e.name.as_str())
1569            .collect();
1570        let find = |name: &str| entities.iter().find(|e| e.name == name).unwrap();
1571        let all_names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1572
1573        // Top-level declarations preserved, and const-assigned function
1574        // expressions are promoted from variable to function.
1575        assert!(top_level.contains(&"foo"), "got: {:?}", top_level);
1576        assert!(top_level.contains(&"bar"), "got: {:?}", top_level);
1577        assert!(top_level.contains(&"items"), "got: {:?}", top_level);
1578        assert_eq!(find("foo").entity_type, "function");
1579        assert_eq!(find("bar").entity_type, "function");
1580        assert_eq!(find("items").entity_type, "variable");
1581
1582        // Locals inside function expressions suppressed
1583        assert!(!all_names.contains(&"inner"), "got: {:?}", all_names);
1584        assert!(!all_names.contains(&"local"), "got: {:?}", all_names);
1585        assert!(!all_names.contains(&"doubled"), "got: {:?}", all_names);
1586
1587        // Named function expression used as callback argument not extracted
1588        assert!(!top_level.contains(&"process"), "got: {:?}", top_level);
1589    }
1590
1591    #[test]
1592    fn test_variable_assigned_arrow_extracts_inner_entities() {
1593        // Arrow function assigned to a variable: inner class/function
1594        // declarations should be extracted, locals should be suppressed.
1595        let code = r#"
1596const handler = () => {
1597  class Inner {
1598    run() { return 1; }
1599  }
1600
1601  function make() {
1602    return new Inner();
1603  }
1604
1605  const local = 42;
1606};
1607"#;
1608        let plugin = CodeParserPlugin;
1609        let entities = plugin.extract_entities(code, "assigned.ts");
1610        let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1611        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1612
1613        assert_eq!(handler.entity_type, "function");
1614        assert!(names.contains(&"handler"), "got: {:?}", names);
1615        assert!(names.contains(&"Inner"), "got: {:?}", names);
1616        assert!(names.contains(&"run"), "got: {:?}", names);
1617        assert!(names.contains(&"make"), "got: {:?}", names);
1618        assert!(!names.contains(&"local"), "got: {:?}", names);
1619    }
1620
1621    #[test]
1622    fn test_variable_assigned_function_expression_extracts_inner_entities() {
1623        // Function expression assigned to a variable: same behavior.
1624        let code = r#"
1625const handler = function() {
1626  class Inner {}
1627  function make() { return new Inner(); }
1628  const local = 42;
1629};
1630"#;
1631        let plugin = CodeParserPlugin;
1632        let entities = plugin.extract_entities(code, "funexpr-inner.ts");
1633        let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1634        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1635
1636        assert_eq!(handler.entity_type, "function");
1637        assert!(names.contains(&"handler"), "got: {:?}", names);
1638        assert!(names.contains(&"Inner"), "got: {:?}", names);
1639        assert!(names.contains(&"make"), "got: {:?}", names);
1640        assert!(!names.contains(&"local"), "got: {:?}", names);
1641    }
1642
1643    #[test]
1644    fn test_let_assigned_arrow_stays_variable_typescript() {
1645        let code = r#"
1646let handler = () => {
1647  return 42;
1648};
1649"#;
1650        let plugin = CodeParserPlugin;
1651        let entities = plugin.extract_entities(code, "let-assigned.ts");
1652        let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1653
1654        assert_eq!(handler.entity_type, "variable");
1655    }
1656
1657    #[test]
1658    fn test_const_assigned_arrow_promoted_to_function_javascript() {
1659        let code = r#"
1660const handler = () => {
1661  return 42;
1662};
1663"#;
1664        let plugin = CodeParserPlugin;
1665        let entities = plugin.extract_entities(code, "handler.js");
1666        let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1667
1668        assert_eq!(handler.entity_type, "function");
1669    }
1670
1671    #[test]
1672    fn test_js_ts_multi_declarator_promotes_each_const_initializer() {
1673        let code = r#"
1674const value = 1, handler = () => value;
1675const first = () => 1, second = 2;
1676"#;
1677        let plugin = CodeParserPlugin;
1678        let entities = plugin.extract_entities(code, "sample.ts");
1679        let find = |name: &str| {
1680            entities.iter().find(|e| e.name == name).unwrap_or_else(|| {
1681                panic!(
1682                    "missing {name}; got: {:?}",
1683                    entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>()
1684                )
1685            })
1686        };
1687
1688        assert_eq!(find("value").entity_type, "variable");
1689        assert_eq!(find("handler").entity_type, "function");
1690        assert_eq!(find("first").entity_type, "function");
1691        assert_eq!(find("second").entity_type, "variable");
1692    }
1693
1694    #[test]
1695    fn test_suppressed_multi_declarator_traverses_skipped_initializers() {
1696        let code = r#"
1697function wrapper() {
1698  const holder = class {
1699    run() { return 1; }
1700  }, handler = () => {
1701    class Inner {
1702      go() { return 2; }
1703    }
1704  }, value = 1;
1705}
1706"#;
1707        let plugin = CodeParserPlugin;
1708        let entities = plugin.extract_entities(code, "sample.ts");
1709        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1710        let find = |name: &str| {
1711            entities.iter().find(|e| e.name == name).unwrap_or_else(|| {
1712                panic!(
1713                    "missing {name}; got: {:?}",
1714                    entities
1715                        .iter()
1716                        .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1717                        .collect::<Vec<_>>()
1718                )
1719            })
1720        };
1721
1722        assert_eq!(find("wrapper").entity_type, "function");
1723        assert_eq!(find("handler").entity_type, "function");
1724        assert!(names.contains(&"run"), "got: {:?}", names);
1725        assert!(names.contains(&"Inner"), "got: {:?}", names);
1726        assert!(names.contains(&"go"), "got: {:?}", names);
1727        assert!(!names.contains(&"holder"), "got: {:?}", names);
1728        assert!(!names.contains(&"value"), "got: {:?}", names);
1729    }
1730
1731    #[test]
1732    fn test_go_var_declaration() {
1733        let code = r#"package featuremgmt
1734
1735type FeatureFlag struct {
1736	Name        string
1737	Description string
1738	Stage       string
1739}
1740
1741var standardFeatureFlags = []FeatureFlag{
1742	{
1743		Name:        "panelTitleSearch",
1744		Description: "Search for dashboards using panel title",
1745		Stage:       "PublicPreview",
1746	},
1747}
1748
1749func GetFlags() []FeatureFlag {
1750	return standardFeatureFlags
1751}
1752"#;
1753        let plugin = CodeParserPlugin;
1754        let entities = plugin.extract_entities(code, "flags.go");
1755        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1756        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1757        eprintln!("Go entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
1758
1759        assert!(names.contains(&"FeatureFlag"), "Should find type FeatureFlag, got: {:?}", names);
1760        assert!(names.contains(&"standardFeatureFlags"), "Should find var standardFeatureFlags, got: {:?}", names);
1761        assert!(names.contains(&"GetFlags"), "Should find func GetFlags, got: {:?}", names);
1762    }
1763
1764    #[test]
1765    fn test_go_grouped_var_declaration() {
1766        let code = r#"package test
1767
1768var (
1769	simple = 42
1770	flags = []string{"a", "b"}
1771)
1772
1773const (
1774	x = 1
1775	y = 2
1776)
1777
1778func main() {}
1779"#;
1780        let plugin = CodeParserPlugin;
1781        let entities = plugin.extract_entities(code, "test.go");
1782        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1783        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1784        eprintln!("Go grouped entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
1785
1786        assert!(names.contains(&"flags") || names.contains(&"simple"), "Should find grouped var, got: {:?}", names);
1787        assert!(names.contains(&"x"), "Should find grouped const x, got: {:?}", names);
1788        assert!(names.contains(&"main"), "Should find func main, got: {:?}", names);
1789    }
1790
1791    #[test]
1792    fn test_dart_entity_extraction() {
1793        let code = r#"
1794import 'dart:math';
1795
1796class Calculator {
1797  final String name;
1798
1799  Calculator(this.name);
1800
1801  Calculator.withDefault() : name = 'default';
1802
1803  factory Calculator.create(String name) {
1804    return Calculator(name);
1805  }
1806
1807  int add(int a, int b) {
1808    return a + b;
1809  }
1810
1811  int get doubleAdd => add(1, 1) * 2;
1812
1813  set label(String value) {
1814    // no-op
1815  }
1816
1817  int operator +(Calculator other) {
1818    return 0;
1819  }
1820}
1821
1822mixin Loggable {
1823  void log(String message) {
1824    print(message);
1825  }
1826}
1827
1828extension StringExt on String {
1829  bool get isBlank => trim().isEmpty;
1830}
1831
1832enum Status {
1833  active,
1834  inactive;
1835
1836  String display() => name.toUpperCase();
1837}
1838
1839typedef Callback = void Function(int);
1840
1841int add(int a, int b) {
1842  return a + b;
1843}
1844
1845extension type Wrapper(int value) implements int {}
1846"#;
1847        let plugin = CodeParserPlugin;
1848        let entities = plugin.extract_entities(code, "calculator.dart");
1849        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1850        eprintln!(
1851            "Dart entities: {:?}",
1852            entities
1853                .iter()
1854                .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1855                .collect::<Vec<_>>()
1856        );
1857
1858        // Top-level declarations
1859        assert!(names.contains(&"Calculator"), "Should find class, got: {:?}", names);
1860        assert!(names.contains(&"Loggable"), "Should find mixin, got: {:?}", names);
1861        assert!(names.contains(&"StringExt"), "Should find extension, got: {:?}", names);
1862        assert!(names.contains(&"Status"), "Should find enum, got: {:?}", names);
1863        assert!(names.contains(&"Callback"), "Should find typedef, got: {:?}", names);
1864        assert!(names.contains(&"add"), "Should find top-level function, got: {:?}", names);
1865        assert!(names.contains(&"Wrapper"), "Should find extension type, got: {:?}", names);
1866
1867        // Class members with correct types
1868        let add_method = entities.iter().find(|e| e.name == "add" && e.parent_id.is_some());
1869        assert!(add_method.is_some(), "Should find add method inside Calculator");
1870        assert_eq!(add_method.unwrap().entity_type, "method");
1871
1872        // Named constructor gets distinct name from unnamed constructor
1873        let unnamed_ctor = entities.iter().find(|e| e.name == "Calculator" && e.entity_type == "constructor");
1874        assert!(unnamed_ctor.is_some(), "Should find unnamed constructor");
1875        let named_ctor = entities.iter().find(|e| e.name == "Calculator.withDefault");
1876        assert!(named_ctor.is_some(), "Should find named constructor Calculator.withDefault, got: {:?}", names);
1877        assert_eq!(named_ctor.unwrap().entity_type, "constructor");
1878        assert_ne!(unnamed_ctor.unwrap().id, named_ctor.unwrap().id, "Named and unnamed constructors must have different entity IDs");
1879
1880        // Factory constructor
1881        let factory_ctor = entities.iter().find(|e| e.name == "Calculator.create");
1882        assert!(factory_ctor.is_some(), "Should find factory constructor Calculator.create, got: {:?}", names);
1883        assert_eq!(factory_ctor.unwrap().entity_type, "constructor");
1884
1885        // Getter, setter, operator
1886        let getter = entities.iter().find(|e| e.name == "doubleAdd");
1887        assert!(getter.is_some(), "Should find getter doubleAdd");
1888        assert_eq!(getter.unwrap().entity_type, "getter");
1889
1890        let setter = entities.iter().find(|e| e.name == "label");
1891        assert!(setter.is_some(), "Should find setter label");
1892        assert_eq!(setter.unwrap().entity_type, "setter");
1893
1894        let operator = entities.iter().find(|e| e.name == "operator +");
1895        assert!(operator.is_some(), "Should find operator +");
1896        assert_eq!(operator.unwrap().entity_type, "method");
1897
1898        // Mixin members have parent
1899        let log_method = entities.iter().find(|e| e.name == "log");
1900        assert!(log_method.is_some(), "Should find log in Loggable");
1901        assert!(log_method.unwrap().parent_id.is_some(), "log should have parent_id");
1902
1903        // Entity type mapping
1904        let callback = entities.iter().find(|e| e.name == "Callback").unwrap();
1905        assert_eq!(callback.entity_type, "type", "typedef should map to 'type'");
1906
1907        let loggable = entities.iter().find(|e| e.name == "Loggable").unwrap();
1908        assert_eq!(loggable.entity_type, "mixin");
1909
1910        let ext = entities.iter().find(|e| e.name == "StringExt").unwrap();
1911        assert_eq!(ext.entity_type, "extension");
1912
1913        let wrapper = entities.iter().find(|e| e.name == "Wrapper").unwrap();
1914        assert_eq!(wrapper.entity_type, "extension");
1915    }
1916
1917    #[test]
1918    fn test_dart_top_level_function_includes_body() {
1919        let code = r#"
1920int add(int a, int b) {
1921  return a + b;
1922}
1923
1924String greet(String name) => 'Hello, $name!';
1925"#;
1926        let plugin = CodeParserPlugin;
1927        let entities = plugin.extract_entities(code, "funcs.dart");
1928        eprintln!(
1929            "Dart top-level: {:?}",
1930            entities
1931                .iter()
1932                .map(|e| (&e.name, &e.entity_type, &e.content))
1933                .collect::<Vec<_>>()
1934        );
1935
1936        let add_fn = entities.iter().find(|e| e.name == "add").unwrap();
1937        assert!(
1938            add_fn.content.contains("return a + b"),
1939            "Top-level function content should include the body, got: {:?}",
1940            add_fn.content
1941        );
1942
1943        let greet_fn = entities.iter().find(|e| e.name == "greet").unwrap();
1944        assert!(
1945            greet_fn.content.contains("Hello"),
1946            "Expression body should be included, got: {:?}",
1947            greet_fn.content
1948        );
1949
1950        // Body changes should produce different content_hash
1951        let code_v2 = r#"
1952int add(int a, int b) {
1953  return a * b;
1954}
1955
1956String greet(String name) => 'Hello, $name!';
1957"#;
1958        let entities_v2 = plugin.extract_entities(code_v2, "funcs.dart");
1959        let add_v2 = entities_v2.iter().find(|e| e.name == "add").unwrap();
1960        assert_ne!(
1961            add_fn.content_hash, add_v2.content_hash,
1962            "Body change should produce different content_hash"
1963        );
1964
1965        // Unchanged function should keep the same hash
1966        let greet_v2 = entities_v2.iter().find(|e| e.name == "greet").unwrap();
1967        assert_eq!(
1968            greet_fn.content_hash, greet_v2.content_hash,
1969            "Unchanged function should keep the same content_hash"
1970        );
1971    }
1972
1973    #[test]
1974    fn test_dart_renamed_named_constructor_same_structural_hash() {
1975        let code_a = r#"
1976class Foo {
1977  Foo.fromJson(Map<String, dynamic> json) {
1978    print(json);
1979  }
1980}
1981"#;
1982        let code_b = r#"
1983class Foo {
1984  Foo.fromMap(Map<String, dynamic> json) {
1985    print(json);
1986  }
1987}
1988"#;
1989        let plugin = CodeParserPlugin;
1990        let entities_a = plugin.extract_entities(code_a, "a.dart");
1991        let entities_b = plugin.extract_entities(code_b, "b.dart");
1992
1993        let ctor_a = entities_a.iter().find(|e| e.name == "Foo.fromJson").unwrap();
1994        let ctor_b = entities_b.iter().find(|e| e.name == "Foo.fromMap").unwrap();
1995
1996        assert_eq!(
1997            ctor_a.structural_hash, ctor_b.structural_hash,
1998            "Renamed named constructor with identical body should have same structural_hash"
1999        );
2000        assert_ne!(
2001            ctor_a.content_hash, ctor_b.content_hash,
2002            "Content hash should differ since raw content includes the name"
2003        );
2004    }
2005
2006    #[test]
2007    fn test_dart_top_level_getter_setter() {
2008        let code = r#"
2009int _value = 0;
2010
2011int get currentValue {
2012  return _value;
2013}
2014
2015set currentValue(int v) {
2016  _value = v;
2017}
2018"#;
2019        let plugin = CodeParserPlugin;
2020        let entities = plugin.extract_entities(code, "accessors.dart");
2021        eprintln!(
2022            "Dart top-level accessors: {:?}",
2023            entities
2024                .iter()
2025                .map(|e| (&e.name, &e.entity_type, &e.content))
2026                .collect::<Vec<_>>()
2027        );
2028
2029        let getter = entities.iter().find(|e| e.name == "currentValue" && e.entity_type == "getter");
2030        assert!(getter.is_some(), "Should find top-level getter, got: {:?}",
2031            entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2032        assert!(
2033            getter.unwrap().content.contains("return _value"),
2034            "Top-level getter content should include the body"
2035        );
2036        assert!(getter.unwrap().parent_id.is_none(), "Top-level getter should have no parent");
2037
2038        // tree-sitter-dart 0.2.0 parses top-level setters as function_signature
2039        // (treating `set` as a type_identifier). setter_signature is only
2040        // produced inside class_member → method_signature.
2041        let setter = entities.iter().find(|e| e.name == "currentValue" && e.entity_type == "function");
2042        assert!(setter.is_some(), "Should find top-level setter as function, got: {:?}",
2043            entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2044        assert!(
2045            setter.unwrap().content.contains("_value = v"),
2046            "Top-level setter content should include the body"
2047        );
2048    }
2049
2050    #[test]
2051    fn test_dart_field_entity_type() {
2052        let code = r#"
2053class Config {
2054  final String name;
2055  static const int maxRetries = 3;
2056}
2057"#;
2058        let plugin = CodeParserPlugin;
2059        let entities = plugin.extract_entities(code, "config.dart");
2060        eprintln!(
2061            "Dart fields: {:?}",
2062            entities
2063                .iter()
2064                .map(|e| (&e.name, &e.entity_type, &e.parent_id))
2065                .collect::<Vec<_>>()
2066        );
2067
2068        let name_field = entities.iter().find(|e| e.name == "name" && e.parent_id.is_some());
2069        assert!(name_field.is_some(), "Should find field 'name', got: {:?}",
2070            entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2071        assert_eq!(name_field.unwrap().entity_type, "field");
2072
2073        let max_retries = entities.iter().find(|e| e.name == "maxRetries");
2074        assert!(max_retries.is_some(), "Should find field 'maxRetries', got: {:?}",
2075            entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2076        assert_eq!(max_retries.unwrap().entity_type, "field");
2077    }
2078
2079    #[test]
2080    fn test_dart_identifier_list_fields() {
2081        // identifier_list produces bare identifier children (no "name" field),
2082        // unlike initialized_identifier_list which wraps each in an
2083        // initialized_identifier node with a "name" field.
2084        let code = r#"
2085abstract class Shape {
2086  abstract double x, y;
2087  abstract String label;
2088}
2089"#;
2090        let plugin = CodeParserPlugin;
2091        let entities = plugin.extract_entities(code, "shape.dart");
2092        eprintln!(
2093            "Dart identifier_list fields: {:?}",
2094            entities
2095                .iter()
2096                .map(|e| (&e.name, &e.entity_type, &e.parent_id))
2097                .collect::<Vec<_>>()
2098        );
2099
2100        let x_field = entities.iter().find(|e| e.name == "x");
2101        assert!(x_field.is_some(), "Should find field 'x' from identifier_list, got: {:?}",
2102            entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2103        assert_eq!(x_field.unwrap().entity_type, "field");
2104        assert!(x_field.unwrap().parent_id.is_some(), "field 'x' should be nested under Shape");
2105
2106        let label_field = entities.iter().find(|e| e.name == "label");
2107        assert!(label_field.is_some(), "Should find field 'label' from single-element identifier_list, got: {:?}",
2108            entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2109        assert_eq!(label_field.unwrap().entity_type, "field");
2110    }
2111
2112    #[test]
2113    fn test_ocaml_entity_extraction() {
2114        let code = r#"
2115type color = Red | Green | Blue
2116
2117type point = {
2118  x : float;
2119  y : float;
2120}
2121
2122exception Not_found of string
2123
2124let greet name =
2125  Printf.printf "Hello, %s!\n" name
2126
2127let add a b = a + b
2128
2129let version = "1.0"
2130
2131let color_to_string = function
2132  | Red -> "red"
2133  | Blue -> "blue"
2134
2135let inc = fun x -> x + 1
2136
2137module MyModule = struct
2138  let helper x = x * 2
2139end
2140
2141module type Printable = sig
2142  val to_string : 'a -> string
2143end
2144
2145external caml_input : in_channel -> bytes -> int -> int -> int = "caml_input"
2146
2147class point_class x_init = object
2148  val mutable x = x_init
2149  method get_x = x
2150end
2151
2152class type measurable = object
2153  method measure : float
2154end
2155"#;
2156        let plugin = CodeParserPlugin;
2157        let entities = plugin.extract_entities(code, "example.ml");
2158        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2159        eprintln!("OCaml entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2160
2161        let find = |name: &str| entities.iter().find(|e| e.name == name)
2162            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2163
2164        assert_eq!(find("color").entity_type, "type");
2165        assert_eq!(find("point").entity_type, "type");
2166        assert_eq!(find("Not_found").entity_type, "exception");
2167        assert_eq!(find("greet").entity_type, "function");
2168        assert_eq!(find("add").entity_type, "function");
2169        assert_eq!(find("version").entity_type, "value");
2170        assert_eq!(find("color_to_string").entity_type, "function");
2171        assert_eq!(find("inc").entity_type, "function");
2172        assert_eq!(find("MyModule").entity_type, "module");
2173        assert_eq!(find("Printable").entity_type, "module_type");
2174        assert_eq!(find("caml_input").entity_type, "external");
2175        assert_eq!(find("point_class").entity_type, "class");
2176        assert_eq!(find("measurable").entity_type, "class_type");
2177    }
2178
2179    #[test]
2180    fn test_ocaml_nested_module_entities() {
2181        let code = r#"
2182module Outer = struct
2183  let x = 42
2184
2185  module Inner = struct
2186    let y = 0
2187  end
2188end
2189"#;
2190        let plugin = CodeParserPlugin;
2191        let entities = plugin.extract_entities(code, "nested.ml");
2192        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2193        eprintln!("OCaml nested: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
2194
2195        let find = |name: &str| entities.iter().find(|e| e.name == name)
2196            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2197
2198        let outer = find("Outer");
2199        let x = find("x");
2200        let inner = find("Inner");
2201        let y = find("y");
2202
2203        assert_eq!(outer.entity_type, "module");
2204        assert_eq!(x.entity_type, "value");
2205        assert_eq!(inner.entity_type, "module");
2206        assert_eq!(y.entity_type, "value");
2207
2208        assert!(x.parent_id.as_ref().is_some_and(|p| p == &outer.id), "x should be nested under Outer");
2209        assert!(inner.parent_id.as_ref().is_some_and(|p| p == &outer.id), "Inner should be nested under Outer");
2210        assert!(y.parent_id.as_ref().is_some_and(|p| p == &inner.id), "y should be nested under Inner");
2211    }
2212
2213    #[test]
2214    fn test_ocaml_interface_entity_extraction() {
2215        let code = r#"
2216type t
2217
2218val create : string -> t
2219val to_string : t -> string
2220
2221exception Invalid_input of string
2222
2223module type Serializable = sig
2224  val serialize : t -> string
2225end
2226"#;
2227        let plugin = CodeParserPlugin;
2228        let entities = plugin.extract_entities(code, "example.mli");
2229        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2230        eprintln!("OCaml interface entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2231
2232        let find = |name: &str| entities.iter().find(|e| e.name == name)
2233            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2234
2235        assert_eq!(find("t").entity_type, "type");
2236        assert_eq!(find("create").entity_type, "val");
2237        assert_eq!(find("to_string").entity_type, "val");
2238        assert_eq!(find("Invalid_input").entity_type, "exception");
2239        assert_eq!(find("Serializable").entity_type, "module_type");
2240    }
2241
2242    #[test]
2243    fn test_ocaml_mutual_recursion_let() {
2244        let code = r#"
2245let rec even n = (n = 0) || odd (n - 1)
2246and odd n = (n <> 0) && even (n - 1)
2247
2248let rec ping x = pong (x - 1)
2249and pong x = if x <= 0 then 0 else ping (x - 1)
2250"#;
2251        let plugin = CodeParserPlugin;
2252        let entities = plugin.extract_entities(code, "mutual.ml");
2253        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2254        eprintln!("OCaml mutual let: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2255
2256        let find = |name: &str| entities.iter().find(|e| e.name == name)
2257            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2258
2259        assert_eq!(find("even").entity_type, "function");
2260        assert_eq!(find("odd").entity_type, "function");
2261        assert_eq!(find("ping").entity_type, "function");
2262        assert_eq!(find("pong").entity_type, "function");
2263    }
2264
2265    #[test]
2266    fn test_ocaml_mutual_recursion_module() {
2267        let code = r#"
2268module rec A : sig val x : int end = struct
2269  let x = B.y + 1
2270end
2271and B : sig val y : int end = struct
2272  let y = 0
2273end
2274"#;
2275        let plugin = CodeParserPlugin;
2276        let entities = plugin.extract_entities(code, "mutual_mod.ml");
2277        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2278        eprintln!("OCaml mutual module: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
2279
2280        let find = |name: &str| entities.iter().find(|e| e.name == name)
2281            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2282
2283        let a = find("A");
2284        let b = find("B");
2285        assert_eq!(a.entity_type, "module");
2286        assert_eq!(b.entity_type, "module");
2287
2288        let x = find("x");
2289        let y = find("y");
2290        assert!(x.parent_id.as_ref().is_some_and(|p| p == &a.id), "x should be nested under A");
2291        assert!(y.parent_id.as_ref().is_some_and(|p| p == &b.id), "y should be nested under B");
2292    }
2293
2294    #[test]
2295    fn test_ocaml_destructured_let() {
2296        let code = r#"
2297let (a, b) = (1, 2)
2298
2299let { x; y } = point
2300
2301let simple = 42
2302"#;
2303        let plugin = CodeParserPlugin;
2304        let entities = plugin.extract_entities(code, "destruct.ml");
2305        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2306        eprintln!("OCaml destructured: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2307
2308        let find = |name: &str| entities.iter().find(|e| e.name == name)
2309            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2310
2311        assert_eq!(find("a").entity_type, "value");
2312        assert_eq!(find("b").entity_type, "value");
2313        assert_eq!(find("x").entity_type, "value");
2314        assert_eq!(find("y").entity_type, "value");
2315        assert_eq!(find("simple").entity_type, "value");
2316    }
2317
2318    #[test]
2319    fn test_ocaml_mutual_recursion_class() {
2320        let code = r#"
2321class foo = object
2322  method x = 1
2323end
2324and bar = object
2325  method y = 2
2326end
2327"#;
2328        let plugin = CodeParserPlugin;
2329        let entities = plugin.extract_entities(code, "classes.ml");
2330        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2331        eprintln!("OCaml mutual class: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2332
2333        let find = |name: &str| entities.iter().find(|e| e.name == name)
2334            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2335
2336        assert_eq!(find("foo").entity_type, "class");
2337        assert_eq!(find("bar").entity_type, "class");
2338    }
2339
2340    #[test]
2341    fn test_perl_entity_extraction() {
2342        let code = r#"package Foo::Bar;
2343
2344use strict;
2345use warnings;
2346
2347sub hello {
2348    my ($self, $name) = @_;
2349    print "Hello, $name!\n";
2350}
2351
2352sub _private_helper {
2353    return 42;
2354}
2355
23561;
2357"#;
2358        let plugin = CodeParserPlugin;
2359        let entities = plugin.extract_entities(code, "Foo/Bar.pm");
2360        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2361
2362        assert!(names.contains(&"Foo::Bar"), "got: {:?}", names);
2363        assert!(names.contains(&"hello"), "got: {:?}", names);
2364        assert!(names.contains(&"_private_helper"), "got: {:?}", names);
2365
2366        let find = |name: &str| entities.iter().find(|e| e.name == name)
2367            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2368
2369        assert_eq!(find("Foo::Bar").entity_type, "package");
2370        assert_eq!(find("hello").entity_type, "function");
2371        assert_eq!(find("_private_helper").entity_type, "function");
2372    }
2373
2374    #[test]
2375    fn test_fortran_entity_extraction() {
2376        let code = r#"module math_utils
2377  implicit none
2378contains
2379  function add(a, b) result(c)
2380    integer, intent(in) :: a, b
2381    integer :: c
2382    c = a + b
2383  end function add
2384
2385  subroutine greet()
2386    print *, "hello"
2387  end subroutine greet
2388end module math_utils
2389
2390program main
2391  implicit none
2392  print *, "hello"
2393end program main
2394"#;
2395        let plugin = CodeParserPlugin;
2396        let entities = plugin.extract_entities(code, "test.f90");
2397        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2398
2399        assert!(names.contains(&"math_utils"), "got: {:?}", names);
2400        assert!(names.contains(&"add"), "got: {:?}", names);
2401        assert!(names.contains(&"greet"), "got: {:?}", names);
2402        assert!(names.contains(&"main"), "got: {:?}", names);
2403
2404        let find = |name: &str| entities.iter().find(|e| e.name == name)
2405            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2406
2407        assert_eq!(find("math_utils").entity_type, "module");
2408        assert_eq!(find("add").entity_type, "function");
2409        assert_eq!(find("greet").entity_type, "subroutine");
2410        assert_eq!(find("main").entity_type, "program");
2411
2412        // Nested entities have parent
2413        assert!(find("add").parent_id.is_some());
2414        assert!(find("greet").parent_id.is_some());
2415    }
2416
2417    #[test]
2418    fn test_scala_entity_extraction() {
2419        let code = r#"
2420package com.example
2421
2422import scala.collection.mutable
2423
2424class UserService(val name: String) {
2425  def getUsers(): List[User] = db.findAll()
2426
2427  def createUser(user: User): Unit = db.save(user)
2428
2429  private def validate(user: User): Boolean = true
2430}
2431
2432object UserService {
2433  def apply(name: String): UserService = new UserService(name)
2434
2435  val DefaultName: String = "default"
2436}
2437
2438trait Repository[T] {
2439  def findById(id: String): Option[T]
2440  def findAll(): List[T]
2441}
2442
2443case class User(id: String, name: String)
2444
2445type UserId = String
2446"#;
2447        let plugin = CodeParserPlugin;
2448        let entities = plugin.extract_entities(code, "UserService.scala");
2449        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2450        eprintln!("Scala entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2451
2452        assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
2453        assert!(names.contains(&"Repository"), "Should find trait Repository, got: {:?}", names);
2454        assert!(names.contains(&"getUsers"), "Should find method getUsers, got: {:?}", names);
2455        assert!(names.contains(&"createUser"), "Should find method createUser, got: {:?}", names);
2456
2457        // Methods should be nested under class
2458        let get_users = entities.iter().find(|e| e.name == "getUsers").unwrap();
2459        assert!(get_users.parent_id.is_some(), "getUsers should have parent_id");
2460    }
2461
2462    #[test]
2463    fn test_scala3_entity_extraction() {
2464        let code = r#"
2465package com.example
2466
2467enum Color:
2468  case Red, Green, Blue
2469
2470enum Planet(mass: Double, radius: Double):
2471  case Mercury extends Planet(3.303e+23, 2.4397e6)
2472  case Venus   extends Planet(4.869e+24, 6.0518e6)
2473
2474object Main:
2475  def main(args: Array[String]): Unit =
2476    println("Hello, World!")
2477
2478trait Greeter:
2479  def greet(name: String): String
2480
2481given Greeter with
2482  def greet(name: String): String = s"Hello, $name!"
2483
2484extension (s: String)
2485  def shout: String = s.toUpperCase + "!"
2486
2487type Predicate[A] = A => Boolean
2488"#;
2489        let plugin = CodeParserPlugin;
2490        let entities = plugin.extract_entities(code, "Main.scala");
2491        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2492        eprintln!("Scala 3 entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2493
2494        assert!(names.contains(&"Color"), "Should find enum Color, got: {:?}", names);
2495        assert!(names.contains(&"Planet"), "Should find enum Planet, got: {:?}", names);
2496        assert!(names.contains(&"Main"), "Should find object Main, got: {:?}", names);
2497        assert!(names.contains(&"Greeter"), "Should find trait Greeter, got: {:?}", names);
2498        assert!(names.contains(&"Predicate"), "Should find type alias Predicate, got: {:?}", names);
2499    }
2500
2501    #[test]
2502    fn test_zig_entity_extraction() {
2503        let code = r#"
2504const std = @import("std");
2505
2506pub const Point = struct {
2507    x: i32,
2508    y: i32,
2509};
2510
2511pub const Color = enum {
2512    red,
2513    green,
2514    blue,
2515};
2516
2517const Person = struct {
2518    name: []const u8,
2519    age: u32,
2520};
2521
2522pub fn greet(name: []const u8) void {
2523    std.debug.print("Hello, {s}!\n", .{name});
2524}
2525
2526fn add(a: i32, b: i32) i32 {
2527    return a + b;
2528}
2529
2530pub fn main() !void {
2531    greet("world");
2532}
2533
2534test "basic addition" {
2535    const result = add(2, 3);
2536    _ = result;
2537}
2538"#;
2539        let plugin = CodeParserPlugin;
2540        let entities = plugin.extract_entities(code, "main.zig");
2541        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2542        let types: std::collections::HashMap<&str, &str> = entities
2543            .iter()
2544            .map(|e| (e.name.as_str(), e.entity_type.as_str()))
2545            .collect();
2546
2547        assert!(names.contains(&"greet"), "Should find greet, got: {:?}", names);
2548        assert!(names.contains(&"add"), "Should find add, got: {:?}", names);
2549        assert!(names.contains(&"main"), "Should find main, got: {:?}", names);
2550        assert!(names.contains(&"Point"), "Should find Point, got: {:?}", names);
2551        assert!(names.contains(&"Color"), "Should find Color, got: {:?}", names);
2552        assert!(names.contains(&"Person"), "Should find Person, got: {:?}", names);
2553
2554        assert_eq!(types["greet"], "function");
2555        assert_eq!(types["add"], "function");
2556        assert_eq!(types["Point"], "struct");
2557        assert_eq!(types["Color"], "enum");
2558        assert_eq!(types["Person"], "struct");
2559    }
2560}