Skip to main content

sem_core/parser/plugins/code/
mod.rs

1mod entity_extractor;
2pub mod languages;
3
4use std::cell::RefCell;
5use std::collections::HashMap;
6
7use crate::model::entity::SemanticEntity;
8use crate::parser::plugin::SemanticParserPlugin;
9use crate::utils::hash::{content_hash, structural_hash};
10use languages::{get_all_code_extensions, get_language_config};
11use entity_extractor::extract_entities;
12
13pub struct CodeParserPlugin;
14
15// Thread-local parser cache: one Parser per language per thread.
16// Avoids creating a new Parser for every file during parallel graph builds.
17thread_local! {
18    static PARSER_CACHE: RefCell<HashMap<&'static str, tree_sitter::Parser>> = RefCell::new(HashMap::new());
19}
20
21fn language_config_for_content(
22    content: &str,
23    file_path: &str,
24) -> Option<&'static languages::LanguageConfig> {
25    let ext = std::path::Path::new(file_path)
26        .extension()
27        .and_then(|e| e.to_str())
28        .map(|e| format!(".{}", e.to_lowercase()))
29        .unwrap_or_default();
30
31    get_language_config(&ext).or_else(|| {
32        detect_ext_from_content(content).and_then(|shebang_ext| get_language_config(&shebang_ext))
33    })
34}
35
36fn parse_tree(
37    config: &'static languages::LanguageConfig,
38    content: &str,
39) -> Option<tree_sitter::Tree> {
40    let language = (config.get_language)()?;
41
42    PARSER_CACHE.with(|cache| {
43        let mut cache = cache.borrow_mut();
44        let parser = cache.entry(config.id).or_insert_with(|| {
45            let mut p = tree_sitter::Parser::new();
46            let _ = p.set_language(&language);
47            p
48        });
49
50        parser.parse(content.as_bytes(), None)
51    })
52}
53
54fn has_non_comment_content(node: tree_sitter::Node, source: &[u8]) -> bool {
55    let mut worklist = Vec::new();
56    let mut cursor = node.walk();
57    worklist.extend(node.children(&mut cursor));
58
59    while let Some(node) = worklist.pop() {
60        if is_comment_node(node.kind()) {
61            continue;
62        }
63
64        if node.child_count() == 0 {
65            let start = node.start_byte();
66            let end = node.end_byte();
67            if start < end
68                && end <= source.len()
69                && source[start..end].iter().any(|b| !b.is_ascii_whitespace())
70            {
71                return true;
72            }
73            continue;
74        }
75
76        let mut cursor = node.walk();
77        worklist.extend(node.children(&mut cursor));
78    }
79
80    false
81}
82
83fn is_comment_node(kind: &str) -> bool {
84    matches!(
85        kind,
86        "comment" | "line_comment" | "block_comment" | "doc_comment" | "tag_comment"
87    )
88}
89
90fn shebang_line(content: &str) -> Option<&str> {
91    content
92        .strip_prefix("#!")
93        .map(|rest| rest.lines().next().unwrap_or(""))
94}
95
96impl SemanticParserPlugin for CodeParserPlugin {
97    fn id(&self) -> &str {
98        "code"
99    }
100
101    fn extensions(&self) -> &[&str] {
102        get_all_code_extensions()
103    }
104
105    fn extract_entities(&self, content: &str, file_path: &str) -> Vec<SemanticEntity> {
106        self.extract_entities_with_tree(content, file_path).0
107    }
108
109    fn extract_entities_with_tree(
110        &self,
111        content: &str,
112        file_path: &str,
113    ) -> (Vec<SemanticEntity>, Option<tree_sitter::Tree>) {
114        let Some(config) = language_config_for_content(content, file_path) else {
115            return (Vec::new(), None);
116        };
117
118        let Some(tree) = parse_tree(config, content) else {
119            return (Vec::new(), None);
120        };
121
122        let entities = extract_entities(&tree, file_path, config, content);
123        (entities, Some(tree))
124    }
125
126    fn structural_hash_content(&self, content: &str, file_path: &str) -> Option<String> {
127        let config = language_config_for_content(content, file_path)?;
128        let tree = parse_tree(config, content)?;
129        let shebang = shebang_line(content);
130        if shebang.is_none() && !has_non_comment_content(tree.root_node(), content.as_bytes()) {
131            return Some(String::new());
132        }
133        let structural = structural_hash(tree.root_node(), content.as_bytes());
134        match shebang {
135            Some(shebang) => Some(content_hash(&format!("shebang:{shebang}\n{structural}"))),
136            None => Some(structural),
137        }
138    }
139}
140
141use crate::parser::registry::detect_ext_from_content;
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146
147    #[test]
148    fn test_java_entity_extraction() {
149        let code = r#"
150package com.example;
151
152import java.util.List;
153
154public class UserService {
155    private String name;
156
157    public UserService(String name) {
158        this.name = name;
159    }
160
161    public List<User> getUsers() {
162        return db.findAll();
163    }
164
165    public void createUser(User user) {
166        db.save(user);
167    }
168}
169
170interface Repository<T> {
171    T findById(String id);
172    List<T> findAll();
173}
174
175enum Status {
176    ACTIVE,
177    INACTIVE,
178    DELETED
179}
180"#;
181        let plugin = CodeParserPlugin;
182        let entities = plugin.extract_entities(code, "UserService.java");
183        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
184        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
185        eprintln!("Java entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
186
187        assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
188        assert!(names.contains(&"Repository"), "Should find interface Repository, got: {:?}", names);
189        assert!(names.contains(&"Status"), "Should find enum Status, got: {:?}", names);
190
191        // A field is named by its declarator, not its type: `private String name;`
192        // is the field `name`, not `String`.
193        let field = entities
194            .iter()
195            .find(|e| e.entity_type == "field")
196            .expect("should extract the field entity");
197        assert_eq!(field.name, "name", "field should be named by its declarator, got: {:?}", field.name);
198    }
199
200    #[test]
201    fn test_java_nested_methods() {
202        let code = r#"
203public class Calculator {
204    public int add(int a, int b) {
205        return a + b;
206    }
207
208    public int subtract(int a, int b) {
209        return a - b;
210    }
211}
212"#;
213        let plugin = CodeParserPlugin;
214        let entities = plugin.extract_entities(code, "Calculator.java");
215        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
216        eprintln!("Java nested: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
217
218        assert!(names.contains(&"Calculator"), "Should find Calculator class");
219        assert!(names.contains(&"add"), "Should find add method, got: {:?}", names);
220        assert!(names.contains(&"subtract"), "Should find subtract method, got: {:?}", names);
221
222        // Methods should have Calculator as parent
223        let add = entities.iter().find(|e| e.name == "add").unwrap();
224        assert!(add.parent_id.is_some(), "add should have parent_id");
225    }
226
227    #[test]
228    fn test_c_entity_extraction() {
229        let code = r#"
230#include <stdio.h>
231
232struct Point {
233    int x;
234    int y;
235};
236
237enum Color {
238    RED,
239    GREEN,
240    BLUE
241};
242
243typedef struct {
244    char name[50];
245    int age;
246} Person;
247
248void greet(const char* name) {
249    printf("Hello, %s!\n", name);
250}
251
252int add(int a, int b) {
253    return a + b;
254}
255
256int main() {
257    greet("world");
258    return 0;
259}
260"#;
261        let plugin = CodeParserPlugin;
262        let entities = plugin.extract_entities(code, "main.c");
263        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
264        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
265        eprintln!("C entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
266
267        assert!(names.contains(&"greet"), "Should find greet function, got: {:?}", names);
268        assert!(names.contains(&"add"), "Should find add function, got: {:?}", names);
269        assert!(names.contains(&"main"), "Should find main function, got: {:?}", names);
270        assert!(names.contains(&"Point"), "Should find Point struct, got: {:?}", names);
271        assert!(names.contains(&"Color"), "Should find Color enum, got: {:?}", names);
272    }
273
274    #[test]
275    fn test_c_function_locals_not_extracted() {
276        let code = r#"
277int global_count = 0;
278int helper(void);
279
280int main(void) {
281    int local = helper();
282    const char *message = "hello";
283    return local + global_count;
284}
285"#;
286        let plugin = CodeParserPlugin;
287        let entities = plugin.extract_entities(code, "main.c");
288        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
289
290        assert!(names.contains(&"global_count"), "got: {:?}", names);
291        assert!(names.contains(&"helper"), "got: {:?}", names);
292        assert!(names.contains(&"main"), "got: {:?}", names);
293        assert!(!names.contains(&"local"), "got: {:?}", names);
294        assert!(!names.contains(&"message"), "got: {:?}", names);
295    }
296
297    #[test]
298    fn test_cpp_entity_extraction() {
299        let code = "namespace math {\nclass Vector3 {\npublic:\n    float length() const { return 0; }\n};\n}\nvoid greet() {}\n";
300        let plugin = CodeParserPlugin;
301        let entities = plugin.extract_entities(code, "main.cpp");
302        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
303        assert!(names.contains(&"math"), "got: {:?}", names);
304        assert!(names.contains(&"Vector3"), "got: {:?}", names);
305        assert!(names.contains(&"greet"), "got: {:?}", names);
306    }
307
308    #[test]
309    fn test_cpp_function_locals_not_extracted() {
310        let code = r#"
311int global_value = 1;
312int helper();
313
314int main() {
315    int local = helper();
316    auto lambda = []() {
317        int lambda_local = 3;
318        return lambda_local;
319    };
320    return local + lambda();
321}
322"#;
323        let plugin = CodeParserPlugin;
324        let entities = plugin.extract_entities(code, "main.cpp");
325        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
326
327        assert!(names.contains(&"global_value"), "got: {:?}", names);
328        assert!(names.contains(&"helper"), "got: {:?}", names);
329        assert!(names.contains(&"main"), "got: {:?}", names);
330        assert!(!names.contains(&"local"), "got: {:?}", names);
331        assert!(!names.contains(&"lambda"), "got: {:?}", names);
332        assert!(!names.contains(&"lambda_local"), "got: {:?}", names);
333    }
334
335    #[test]
336    fn test_ruby_entity_extraction() {
337        let code = "module Auth\n  class User\n    def greet\n      \"hi\"\n    end\n  end\nend\ndef helper(x)\n  x * 2\nend\n";
338        let plugin = CodeParserPlugin;
339        let entities = plugin.extract_entities(code, "auth.rb");
340        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
341        assert!(names.contains(&"Auth"), "got: {:?}", names);
342        assert!(names.contains(&"User"), "got: {:?}", names);
343        assert!(names.contains(&"helper"), "got: {:?}", names);
344    }
345
346    #[test]
347    fn test_csharp_entity_extraction() {
348        let code = "namespace MyApp {\npublic class User {\n    public string GetName() { return \"\"; }\n}\npublic enum Role { Admin, User }\n}\n";
349        let plugin = CodeParserPlugin;
350        let entities = plugin.extract_entities(code, "Models.cs");
351        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
352        assert!(names.contains(&"MyApp"), "got: {:?}", names);
353        assert!(names.contains(&"User"), "got: {:?}", names);
354        assert!(names.contains(&"Role"), "got: {:?}", names);
355    }
356
357    #[test]
358    fn test_swift_entity_extraction() {
359        let code = r#"
360import Foundation
361
362typealias Handler = (Int) -> Void
363
364prefix operator ~~~
365
366class UserService {
367    var name: String
368
369    init(name: String) {
370        self.name = name
371    }
372
373    deinit {
374        print("freed")
375    }
376
377    func getUsers() -> [User] {
378        return db.findAll()
379    }
380}
381
382struct Point {
383    var x: Double
384    var y: Double
385
386    subscript(index: Int) -> Double {
387        return x + y + Double(index)
388    }
389}
390
391enum Status {
392    case active
393    case inactive
394    case deleted
395}
396
397protocol Repository {
398    associatedtype Canvas
399    func findById(id: String) -> Canvas?
400    func findAll() -> [Canvas]
401}
402
403func helper(x: Int) -> Int {
404    return x * 2
405}
406"#;
407        let plugin = CodeParserPlugin;
408        let entities = plugin.extract_entities(code, "UserService.swift");
409        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
410        eprintln!("Swift entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
411
412        assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
413        assert!(names.contains(&"Point"), "Should find struct Point, got: {:?}", names);
414        assert!(names.contains(&"Status"), "Should find enum Status, got: {:?}", names);
415        assert!(names.contains(&"Repository"), "Should find protocol Repository, got: {:?}", names);
416        assert!(names.contains(&"Canvas"), "Should find associatedtype Canvas, got: {:?}", names);
417        assert!(names.contains(&"Handler"), "Should find typealias Handler, got: {:?}", names);
418        assert!(names.contains(&"~~~"), "Should find custom operator ~~~, got: {:?}", names);
419        assert!(names.contains(&"init"), "Should find initializer init, got: {:?}", names);
420        assert!(names.contains(&"deinit"), "Should find deinitializer deinit, got: {:?}", names);
421        assert!(names.contains(&"subscript"), "Should find subscript, got: {:?}", names);
422        assert!(names.contains(&"helper"), "Should find function helper, got: {:?}", names);
423
424        let handler = entities.iter().find(|e| e.name == "Handler").unwrap();
425        assert_eq!(handler.entity_type, "type");
426        assert!(handler.parent_id.is_none());
427
428        let operator = entities.iter().find(|e| e.name == "~~~").unwrap();
429        assert_eq!(operator.entity_type, "operator");
430        assert!(operator.parent_id.is_none());
431
432        let user_service = entities.iter().find(|e| e.name == "UserService").unwrap();
433        assert_eq!(user_service.entity_type, "class");
434
435        let initializer = entities.iter().find(|e| e.name == "init").unwrap();
436        assert_eq!(initializer.entity_type, "init");
437        assert_eq!(initializer.parent_id.as_deref(), Some(user_service.id.as_str()));
438        assert_eq!(initializer.id, "UserService.swift::class::UserService::init");
439
440        let deinitializer = entities.iter().find(|e| e.name == "deinit").unwrap();
441        assert_eq!(deinitializer.entity_type, "deinit");
442        assert_eq!(deinitializer.parent_id.as_deref(), Some(user_service.id.as_str()));
443        assert_eq!(
444            deinitializer.id,
445            "UserService.swift::class::UserService::deinit"
446        );
447
448        let point = entities.iter().find(|e| e.name == "Point").unwrap();
449        assert_eq!(point.entity_type, "struct");
450
451        let subscript = entities.iter().find(|e| e.name == "subscript").unwrap();
452        assert_eq!(subscript.entity_type, "subscript");
453        assert_eq!(subscript.parent_id.as_deref(), Some(point.id.as_str()));
454        assert_eq!(
455            subscript.id,
456            "UserService.swift::struct::Point::subscript"
457        );
458
459        let status = entities.iter().find(|e| e.name == "Status").unwrap();
460        assert_eq!(status.entity_type, "enum");
461
462        let repository = entities.iter().find(|e| e.name == "Repository").unwrap();
463        assert_eq!(repository.entity_type, "protocol");
464        assert_eq!(repository.id, "UserService.swift::protocol::Repository");
465
466        let canvas = entities.iter().find(|e| e.name == "Canvas").unwrap();
467        assert_eq!(canvas.entity_type, "associatedtype");
468        assert_eq!(canvas.parent_id.as_deref(), Some(repository.id.as_str()));
469        assert_eq!(
470            canvas.id,
471            "UserService.swift::protocol::Repository::Canvas"
472        );
473    }
474
475    #[test]
476    fn test_swift_multi_binding_property_extraction() {
477        let code = r#"
478struct Point {
479    var x, y: Int
480}
481"#;
482        let plugin = CodeParserPlugin;
483        let entities = plugin.extract_entities(code, "Point.swift");
484        let point = entities.iter().find(|e| e.name == "Point").unwrap();
485        let properties: Vec<_> = entities
486            .iter()
487            .filter(|e| e.entity_type == "property")
488            .collect();
489
490        assert_eq!(
491            properties.iter().map(|e| e.name.as_str()).collect::<Vec<_>>(),
492            vec!["x", "y"]
493        );
494        assert!(properties
495            .iter()
496            .all(|property| property.parent_id.as_deref() == Some(point.id.as_str())));
497        assert_eq!(properties[0].content, "var x: Int");
498        assert_eq!(properties[1].content, "var y: Int");
499    }
500
501    #[test]
502    fn test_swift_multi_binding_property_content_is_per_binding() {
503        let typed_code = r#"
504struct Types {
505    var x: Int, y: String
506}
507"#;
508        let plugin = CodeParserPlugin;
509        let typed_entities = plugin.extract_entities(typed_code, "Types.swift");
510        let typed_properties: Vec<_> = typed_entities
511            .iter()
512            .filter(|e| e.entity_type == "property")
513            .collect();
514        assert_eq!(typed_properties[0].content, "var x: Int");
515        assert_eq!(typed_properties[1].content, "var y: String");
516
517        let mixed_code = r#"
518struct Mixed {
519    var x, y: Int, z: String
520}
521"#;
522        let mixed_entities = plugin.extract_entities(mixed_code, "Mixed.swift");
523        let mixed_properties: Vec<_> = mixed_entities
524            .iter()
525            .filter(|e| e.entity_type == "property")
526            .collect();
527        assert_eq!(mixed_properties[0].content, "var x: Int");
528        assert_eq!(mixed_properties[1].content, "var y: Int");
529        assert_eq!(mixed_properties[2].content, "var z: String");
530
531        let generic_code = r#"
532struct GenericTypes {
533    var lookup: Dictionary<String, Int>, count: Int
534}
535"#;
536        let generic_entities = plugin.extract_entities(generic_code, "GenericTypes.swift");
537        let generic_properties: Vec<_> = generic_entities
538            .iter()
539            .filter(|e| e.entity_type == "property")
540            .collect();
541        assert_eq!(
542            generic_properties[0].content,
543            "var lookup: Dictionary<String, Int>"
544        );
545        assert_eq!(generic_properties[1].content, "var count: Int");
546
547        let initializer_code = r#"
548struct Initializers {
549    var a = Foo(), b = Bar()
550}
551"#;
552        let initializer_entities = plugin.extract_entities(initializer_code, "Initializers.swift");
553        let initializer_properties: Vec<_> = initializer_entities
554            .iter()
555            .filter(|e| e.entity_type == "property")
556            .collect();
557        assert!(initializer_properties[0].content.contains("Foo()"));
558        assert!(!initializer_properties[0].content.contains("Bar()"));
559        assert!(initializer_properties[1].content.contains("Bar()"));
560        assert!(!initializer_properties[1].content.contains("Foo()"));
561
562        let constants_code = r#"
563struct Constants {
564    let first, second, third: Int
565}
566"#;
567        let constants_entities = plugin.extract_entities(constants_code, "Constants.swift");
568        let constants_properties: Vec<_> = constants_entities
569            .iter()
570            .filter(|e| e.entity_type == "property")
571            .collect();
572        assert_eq!(
573            constants_properties.iter().map(|e| e.name.as_str()).collect::<Vec<_>>(),
574            vec!["first", "second", "third"]
575        );
576        assert_eq!(constants_properties[0].content, "let first: Int");
577        assert_eq!(constants_properties[1].content, "let second: Int");
578        assert_eq!(constants_properties[2].content, "let third: Int");
579
580        let semicolon_code = r#"
581struct Semicolons {
582    var left, right: Int; var next: Int
583}
584"#;
585        let semicolon_entities = plugin.extract_entities(semicolon_code, "Semicolons.swift");
586        let semicolon_properties: Vec<_> = semicolon_entities
587            .iter()
588            .filter(|e| e.entity_type == "property")
589            .collect();
590        assert_eq!(semicolon_properties[0].content, "var left: Int");
591        assert_eq!(semicolon_properties[1].content, "var right: Int");
592        assert_eq!(semicolon_properties[2].content, "var next: Int");
593    }
594
595    #[test]
596    fn test_swift_body_locals_not_extracted_as_properties() {
597        let code = r#"
598class Cache {
599    var stored: Int
600
601    var computed: Int {
602        let computedLocal = stored + 1
603        func computedNested() -> Int {
604            return computedLocal
605        }
606        return computedNested()
607    }
608
609    var explicit: Int {
610        get {
611            let getterLocal = stored
612            func getterNested() -> Int {
613                return getterLocal
614            }
615            return getterNested()
616        }
617    }
618
619    init(seed: Int) {
620        let initial = seed
621        self.stored = initial
622    }
623
624    func value() -> Int {
625        let doubled = stored * 2
626        var offset = doubled + 1
627        func nested() -> Int {
628            let insideNested = offset
629            return insideNested
630        }
631        return nested()
632    }
633
634    subscript(index: Int) -> Int {
635        let shifted = index + stored
636        func subscriptNested() -> Int {
637            return shifted
638        }
639        return subscriptNested()
640    }
641
642    deinit {
643        let closing = stored
644        _ = closing
645    }
646}
647"#;
648        let plugin = CodeParserPlugin;
649        let entities = plugin.extract_entities(code, "Cache.swift");
650        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
651
652        assert!(names.contains(&"Cache"), "got: {:?}", names);
653        assert!(names.contains(&"stored"), "got: {:?}", names);
654        assert!(names.contains(&"computed"), "got: {:?}", names);
655        assert!(names.contains(&"explicit"), "got: {:?}", names);
656        assert!(names.contains(&"init"), "got: {:?}", names);
657        assert!(names.contains(&"value"), "got: {:?}", names);
658        assert!(names.contains(&"computedNested"), "got: {:?}", names);
659        assert!(names.contains(&"getterNested"), "got: {:?}", names);
660        assert!(names.contains(&"nested"), "got: {:?}", names);
661        assert!(names.contains(&"subscriptNested"), "got: {:?}", names);
662        assert!(names.contains(&"subscript"), "got: {:?}", names);
663        assert!(names.contains(&"deinit"), "got: {:?}", names);
664        assert!(!names.contains(&"Int"), "got: {:?}", names);
665
666        for local in [
667            "computedLocal",
668            "getterLocal",
669            "initial",
670            "doubled",
671            "offset",
672            "insideNested",
673            "shifted",
674            "closing",
675        ] {
676            assert!(!names.contains(&local), "{local} should not be an entity. Got: {:?}", names);
677        }
678    }
679
680    #[test]
681    fn test_swift_suppressed_multi_binding_initializers_are_traversed() {
682        let code = r#"
683func outer() {
684    let a = { func innerA() -> Int { 1 } },
685        b = { func innerB() -> Int { 2 } }
686}
687"#;
688        let plugin = CodeParserPlugin;
689        let entities = plugin.extract_entities(code, "Locals.swift");
690        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
691
692        assert!(names.contains(&"outer"), "got: {:?}", names);
693        assert!(names.contains(&"innerA"), "got: {:?}", names);
694        assert!(names.contains(&"innerB"), "got: {:?}", names);
695        assert!(!names.contains(&"a"), "local binding should stay suppressed: {:?}", names);
696        assert!(!names.contains(&"b"), "local binding should stay suppressed: {:?}", names);
697    }
698
699    #[test]
700    fn test_swift_conditional_compilation_inside_struct() {
701        let code = r#"
702import ArgumentParser
703
704public struct TuistCommand: AsyncParsableCommand {
705    public init() {}
706
707    public static var configuration: CommandConfiguration {
708        let comment = "brace in string }"
709        let multiline = """
710        brace in multiline }
711        escaped \"""
712        """
713        /* brace in comment } */
714        CommandConfiguration(commandName: "tuist")
715    }
716
717    #if os(macOS)
718        public static var groupedSubcommands: [ParsableCommand.Type] {
719            [InstallCommand.self]
720        }
721    #else
722        public static var groupedSubcommands: [ParsableCommand.Type] {
723            []
724        }
725    #endif
726
727    public func run() async throws {}
728}
729"#;
730        let plugin = CodeParserPlugin;
731        let entities = plugin.extract_entities(code, "TuistCommand.swift");
732        eprintln!(
733            "Swift conditional entities: {:?}",
734            entities
735                .iter()
736                .map(|e| (&e.name, &e.entity_type, &e.parent_id))
737                .collect::<Vec<_>>()
738        );
739
740        let command = entities
741            .iter()
742            .find(|e| e.name == "TuistCommand")
743            .expect("Should recover TuistCommand struct");
744        assert_eq!(command.entity_type, "struct");
745        assert!(command.parent_id.is_none());
746
747        let renamed_code = code.replace("TuistCommand", "RenamedCommand");
748        let renamed_entities = plugin.extract_entities(&renamed_code, "TuistCommand.swift");
749        let renamed_command = renamed_entities
750            .iter()
751            .find(|e| e.name == "RenamedCommand")
752            .expect("Should recover renamed command struct");
753        assert_eq!(command.structural_hash, renamed_command.structural_hash);
754
755        for member in ["init", "configuration", "run"] {
756            let entity = entities
757                .iter()
758                .find(|e| e.name == member)
759                .unwrap_or_else(|| panic!("Should find {member}"));
760            assert_eq!(entity.parent_id.as_deref(), Some(command.id.as_str()));
761        }
762
763        let grouped_subcommands: Vec<_> = entities
764            .iter()
765            .filter(|e| e.name == "groupedSubcommands")
766            .collect();
767        assert_eq!(grouped_subcommands.len(), 2);
768        assert!(grouped_subcommands
769            .iter()
770            .all(|entity| entity.parent_id.as_deref() == Some(command.id.as_str())));
771    }
772
773    #[test]
774    fn test_swift_conditional_compilation_with_interpolated_brace_string() {
775        let plugin = CodeParserPlugin;
776        for (container_name, code) in [
777            (
778                "Config",
779                r#"
780class Config {
781    let tpl = "prefix \("}") suffix"
782#if DEBUG
783    func dump() { print(tpl) }
784#endif
785    func render() -> String { return tpl }
786}
787
788struct Tail { let q: Int }
789"#,
790            ),
791            (
792                "RawConfig",
793                r##"
794class RawConfig {
795    let tpl = #"prefix \#("{") suffix"#
796#if DEBUG
797    func dump() { print(tpl) }
798#endif
799    func render() -> String { return tpl }
800}
801"##,
802            ),
803            (
804                "MultilineConfig",
805                r#"
806class MultilineConfig {
807    let tpl = """
808    prefix \("}") suffix
809    """
810#if DEBUG
811    func dump() { print(tpl) }
812#endif
813    func render() -> String { return tpl }
814}
815"#,
816            ),
817            (
818                "ClosureConfig",
819                r#"
820class ClosureConfig {
821    let tpl = "prefix \(["}"].map { $0 }.joined()) suffix"
822#if DEBUG
823    func dump() { print(tpl) }
824#endif
825    func render() -> String { return tpl }
826}
827"#,
828            ),
829        ] {
830            let file_path = format!("{container_name}.swift");
831            let entities = plugin.extract_entities(code, &file_path);
832            let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
833            let container = entities
834                .iter()
835                .find(|e| e.name == container_name)
836                .unwrap_or_else(|| {
837                    panic!("Should recover {container_name}, got: {names:?}");
838                });
839            assert_eq!(container.entity_type, "class");
840            assert!(container.parent_id.is_none());
841
842            for member in ["tpl", "dump", "render"] {
843                let entity = entities
844                    .iter()
845                    .find(|e| e.name == member)
846                    .unwrap_or_else(|| {
847                        panic!("Should find {member} in {container_name}, got: {names:?}");
848                    });
849                assert_eq!(entity.parent_id.as_deref(), Some(container.id.as_str()));
850            }
851        }
852    }
853
854    #[test]
855    fn test_elixir_entity_extraction() {
856        let code = r#"
857defmodule MyApp.Accounts do
858  def create_user(attrs) do
859    %User{}
860    |> User.changeset(attrs)
861    |> Repo.insert()
862  end
863
864  defp validate(attrs) do
865    # private helper
866    :ok
867  end
868
869  defmacro is_admin(user) do
870    quote do
871      unquote(user).role == :admin
872    end
873  end
874
875  defguard is_positive(x) when is_integer(x) and x > 0
876end
877
878defprotocol Printable do
879  def to_string(data)
880end
881
882defimpl Printable, for: Integer do
883  def to_string(i), do: Integer.to_string(i)
884end
885"#;
886        let plugin = CodeParserPlugin;
887        let entities = plugin.extract_entities(code, "accounts.ex");
888        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
889        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
890        eprintln!("Elixir entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
891
892        assert!(names.contains(&"MyApp.Accounts"), "Should find module, got: {:?}", names);
893        assert!(names.contains(&"create_user"), "Should find def, got: {:?}", names);
894        assert!(names.contains(&"validate"), "Should find defp, got: {:?}", names);
895        assert!(names.contains(&"is_admin"), "Should find defmacro, got: {:?}", names);
896        assert!(names.contains(&"Printable"), "Should find defprotocol, got: {:?}", names);
897
898        // Verify nesting: create_user should have MyApp.Accounts as parent
899        let create_user = entities.iter().find(|e| e.name == "create_user").unwrap();
900        assert!(create_user.parent_id.is_some(), "create_user should be nested under module");
901    }
902
903    #[test]
904    #[cfg(feature = "lang-clojure")]
905    fn test_clojure_entity_extraction() {
906        let code = r#"
907(ns my.app.core
908  (:require [clojure.string :as str]))
909
910(def my-var 42)
911
912(def ^:private secret "hunter2")
913
914(defonce connection (atom nil))
915
916(defn greet
917  "Returns a greeting string."
918  [name]
919  (str "Hello, " name "!"))
920
921(defmacro unless [pred & body]
922  `(when (not ~pred) ~@body))
923
924(defprotocol Greeter
925  (greet! [this name]))
926
927(defrecord Person [name age])
928
929(defmulti area :shape)
930
931(defmethod area :circle [{:keys [radius]}]
932  (* Math/PI radius radius))
933
934(defmethod area :rectangle [{:keys [width height]}]
935  (* width height))
936"#;
937        let plugin = CodeParserPlugin;
938        let entities = plugin.extract_entities(code, "core.clj");
939        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
940        eprintln!(
941            "Clojure entities: {:?}",
942            entities
943                .iter()
944                .map(|e| (&e.name, &e.entity_type))
945                .collect::<Vec<_>>()
946        );
947
948        assert!(!names.contains(&"my.app.core"), "Should not extract ns form as entity, got: {:?}", names);
949        assert!(names.contains(&"my-var"), "Should find def, got: {:?}", names);
950        assert!(names.contains(&"secret"), "Should strip ^:private metadata from name, got: {:?}", names);
951        assert!(names.contains(&"connection"), "Should find defonce, got: {:?}", names);
952        assert!(names.contains(&"greet"), "Should find defn, got: {:?}", names);
953        assert!(names.contains(&"unless"), "Should find defmacro, got: {:?}", names);
954        assert!(names.contains(&"Greeter"), "Should find defprotocol, got: {:?}", names);
955        assert!(names.contains(&"Person"), "Should find defrecord, got: {:?}", names);
956        assert!(names.contains(&"area"), "Should find defmulti, got: {:?}", names);
957        // defmethods get dispatch-qualified names so two methods on the same multimethod are distinct
958        assert!(names.contains(&"area/:circle"), "Should find defmethod area :circle, got: {:?}", names);
959        assert!(names.contains(&"area/:rectangle"), "Should find defmethod area :rectangle, got: {:?}", names);
960        let ids: Vec<&str> = entities.iter().map(|e| e.id.as_str()).collect();
961        assert!(ids.iter().collect::<std::collections::HashSet<_>>().len() == ids.len(),
962            "All entity IDs must be unique, got: {:?}", ids);
963    }
964
965    #[test]
966    #[cfg(feature = "lang-clojure")]
967    fn test_clojure_defn_private() {
968        let code = r#"
969(ns my.app)
970
971(defn- private-helper [x]
972  (* x 2))
973"#;
974        let plugin = CodeParserPlugin;
975        let entities = plugin.extract_entities(code, "app.clj");
976        let entity = entities
977            .iter()
978            .find(|e| e.name == "private-helper")
979            .expect("Should extract defn- as a function entity");
980        assert_eq!(entity.entity_type, "function");
981    }
982
983    #[test]
984    #[cfg(feature = "lang-clojure")]
985    fn test_clojure_predicate_and_bang_functions() {
986        let code = r#"
987(ns my.app.validators)
988
989(defn empty? [coll]
990  (= 0 (count coll)))
991
992(defn reset! [state new-val]
993  (compare-and-set! state @state new-val))
994"#;
995        let plugin = CodeParserPlugin;
996        let entities = plugin.extract_entities(code, "validators.clj");
997        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
998        assert!(names.contains(&"empty?"), "Should extract predicate fn empty?, got: {:?}", names);
999        assert!(names.contains(&"reset!"), "Should extract bang fn reset!, got: {:?}", names);
1000        let empty_entity = entities.iter().find(|e| e.name == "empty?").unwrap();
1001        let reset_entity = entities.iter().find(|e| e.name == "reset!").unwrap();
1002        assert_eq!(empty_entity.entity_type, "function");
1003        assert_eq!(reset_entity.entity_type, "function");
1004    }
1005
1006    #[test]
1007    #[cfg(feature = "lang-clojure")]
1008    fn test_clojure_dynamic_vars_and_equality_fns() {
1009        let code = r#"
1010(ns my.app.core)
1011
1012(def *db* (atom nil))
1013
1014(defn not= [a b]
1015  (not (= a b)))
1016"#;
1017        let plugin = CodeParserPlugin;
1018        let entities = plugin.extract_entities(code, "core.clj");
1019        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1020        assert!(names.contains(&"*db*"), "Should extract dynamic var *db*, got: {:?}", names);
1021        assert!(names.contains(&"not="), "Should extract fn not=, got: {:?}", names);
1022        let db_entity = entities.iter().find(|e| e.name == "*db*").unwrap();
1023        let noteq_entity = entities.iter().find(|e| e.name == "not=").unwrap();
1024        assert_eq!(db_entity.entity_type, "var");
1025        assert_eq!(noteq_entity.entity_type, "function");
1026    }
1027
1028    #[test]
1029    #[cfg(feature = "lang-clojure")]
1030    fn test_clojure_deftype_definterface_defstruct() {
1031        let code = r#"
1032(ns my.app)
1033
1034(deftype MyType [field])
1035
1036(definterface IFoo
1037  (foo [this]))
1038
1039(defstruct point :x :y)
1040"#;
1041        let plugin = CodeParserPlugin;
1042        let entities = plugin.extract_entities(code, "app.clj");
1043        let by_name = |name: &str| entities.iter().find(|e| e.name == name);
1044
1045        assert!(by_name("MyType").is_some(), "Should extract deftype, got: {:?}", entities.iter().map(|e| &e.name).collect::<Vec<_>>());
1046        assert_eq!(by_name("MyType").unwrap().entity_type, "type");
1047
1048        assert!(by_name("IFoo").is_some(), "Should extract definterface, got: {:?}", entities.iter().map(|e| &e.name).collect::<Vec<_>>());
1049        assert_eq!(by_name("IFoo").unwrap().entity_type, "interface");
1050
1051        assert!(by_name("point").is_some(), "Should extract defstruct, got: {:?}", entities.iter().map(|e| &e.name).collect::<Vec<_>>());
1052        assert_eq!(by_name("point").unwrap().entity_type, "struct");
1053    }
1054
1055    #[test]
1056    #[cfg(feature = "lang-clojure")]
1057    fn test_clojure_cljc_extension() {
1058        let code = r#"
1059(ns my.app.shared)
1060
1061(defn platform-key [] :default)
1062
1063(def shared-value 99)
1064"#;
1065        let plugin = CodeParserPlugin;
1066        let entities = plugin.extract_entities(code, "shared.cljc");
1067        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1068        assert!(names.contains(&"platform-key"), "Should extract defn from .cljc, got: {:?}", names);
1069        assert!(names.contains(&"shared-value"), "Should extract def from .cljc, got: {:?}", names);
1070    }
1071
1072    #[test]
1073    #[cfg(feature = "lang-clojure")]
1074    fn test_clojure_defmethod_non_keyword_dispatch() {
1075        let code = r#"
1076(ns my.app)
1077
1078(defmulti process identity)
1079
1080(defmethod process nil [_] :nothing)
1081
1082(defmethod process "string" [s] s)
1083
1084(defmethod process 42 [n] n)
1085"#;
1086        let plugin = CodeParserPlugin;
1087        let entities = plugin.extract_entities(code, "app.clj");
1088        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1089        assert!(names.contains(&"process"), "Should extract defmulti, got: {:?}", names);
1090        assert!(names.contains(&"process/nil"), "Should extract defmethod with nil dispatch, got: {:?}", names);
1091        assert!(names.contains(&"process/\"string\""), "Should extract defmethod with string dispatch, got: {:?}", names);
1092        assert!(names.contains(&"process/42"), "Should extract defmethod with integer dispatch, got: {:?}", names);
1093        let ids: Vec<&str> = entities.iter().map(|e| e.id.as_str()).collect();
1094        assert!(
1095            ids.iter().collect::<std::collections::HashSet<_>>().len() == ids.len(),
1096            "All entity IDs must be unique, got: {:?}", ids
1097        );
1098    }
1099
1100    #[test]
1101    fn test_bash_entity_extraction() {
1102        let code = r#"#!/bin/bash
1103
1104greet() {
1105    echo "Hello, $1!"
1106}
1107
1108function deploy {
1109    echo "deploying..."
1110}
1111
1112# not a function
1113echo "main script"
1114"#;
1115        let plugin = CodeParserPlugin;
1116        let entities = plugin.extract_entities(code, "deploy.sh");
1117        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1118        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1119        eprintln!("Bash entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
1120
1121        assert!(names.contains(&"greet"), "Should find greet(), got: {:?}", names);
1122        assert!(names.contains(&"deploy"), "Should find function deploy, got: {:?}", names);
1123        assert_eq!(entities.len(), 2, "Should only find functions, got: {:?}", names);
1124    }
1125
1126    #[test]
1127    fn test_typescript_entity_extraction() {
1128        // Existing language should still work
1129        let code = r#"
1130export function hello(): string {
1131    return "hello";
1132}
1133
1134export class Greeter {
1135    greet(name: string): string {
1136        return `Hello, ${name}!`;
1137    }
1138}
1139"#;
1140        let plugin = CodeParserPlugin;
1141        let entities = plugin.extract_entities(code, "test.ts");
1142        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1143        assert!(names.contains(&"hello"), "Should find hello function");
1144        assert!(names.contains(&"Greeter"), "Should find Greeter class");
1145    }
1146
1147    #[test]
1148    fn test_same_line_typescript_overload_ids_are_unique() {
1149        let code = "function f(a: number): void {}; function f(a: string): void {}\n";
1150        let plugin = CodeParserPlugin;
1151        let entities = plugin.extract_entities(code, "over.ts");
1152        let overloads: Vec<&SemanticEntity> = entities
1153            .iter()
1154            .filter(|entity| entity.name == "f" && entity.entity_type == "function")
1155            .collect();
1156        let ids: Vec<&str> = overloads.iter().map(|entity| entity.id.as_str()).collect();
1157
1158        assert_eq!(overloads.len(), 2, "expected both overloads, got: {entities:?}");
1159        assert_eq!(ids, vec!["over.ts::function::f@L1#1", "over.ts::function::f@L1#2"]);
1160    }
1161
1162    #[test]
1163    fn test_same_line_duplicate_parent_ids_are_propagated_to_children() {
1164        let code = "class C { m(){ return 1 } } class C { m(){ return 2 } }\n";
1165        let plugin = CodeParserPlugin;
1166        let entities = plugin.extract_entities(code, "c.ts");
1167        let classes: Vec<&SemanticEntity> = entities
1168            .iter()
1169            .filter(|entity| entity.name == "C" && entity.entity_type == "class")
1170            .collect();
1171        let methods: Vec<&SemanticEntity> = entities
1172            .iter()
1173            .filter(|entity| entity.name == "m" && entity.entity_type == "method")
1174            .collect();
1175
1176        assert_eq!(classes.len(), 2, "expected both classes, got: {entities:?}");
1177        assert_eq!(methods.len(), 2, "expected both methods, got: {entities:?}");
1178        assert_eq!(classes[0].id, "c.ts::class::C@L1#1");
1179        assert_eq!(classes[1].id, "c.ts::class::C@L1#2");
1180        assert_eq!(methods[0].parent_id.as_deref(), Some("c.ts::class::C@L1#1"));
1181        assert_eq!(methods[1].parent_id.as_deref(), Some("c.ts::class::C@L1#2"));
1182        assert_eq!(methods[0].id, "c.ts::class::C@L1#1::m");
1183        assert_eq!(methods[1].id, "c.ts::class::C@L1#2::m");
1184    }
1185
1186    #[test]
1187    fn test_module_typescript_entity_extraction() {
1188        let code = r#"
1189export function hello(): string {
1190    return "hello";
1191}
1192"#;
1193        let plugin = CodeParserPlugin;
1194        let entities = plugin.extract_entities(code, "test.mts");
1195        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1196
1197        assert!(names.contains(&"hello"), "Should find hello function");
1198    }
1199
1200    #[test]
1201    fn test_commonjs_typescript_entity_extraction() {
1202        let code = r#"
1203export class Greeter {
1204    greet(name: string): string {
1205        return `Hello, ${name}!`;
1206    }
1207}
1208"#;
1209        let plugin = CodeParserPlugin;
1210        let entities = plugin.extract_entities(code, "test.cts");
1211        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1212
1213        assert!(names.contains(&"Greeter"), "Should find Greeter class");
1214        assert!(names.contains(&"greet"), "Should find greet method");
1215    }
1216
1217    #[test]
1218    fn test_typescript_generator_function_entity_extraction() {
1219        let code = r#"
1220export async function* streamUsers(): AsyncGenerator<string> {
1221    yield "alice";
1222}
1223"#;
1224        let plugin = CodeParserPlugin;
1225        let entities = plugin.extract_entities(code, "stream.ts");
1226        let stream = entities.iter().find(|e| e.name == "streamUsers");
1227
1228        assert!(stream.is_some(), "Should find generator function, got: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1229        assert_eq!(stream.unwrap().entity_type, "function");
1230    }
1231
1232    #[test]
1233    fn test_javascript_generator_function_entity_extraction() {
1234        let code = r#"
1235export function* ids() {
1236    yield 1;
1237    yield 2;
1238}
1239"#;
1240        let plugin = CodeParserPlugin;
1241        let entities = plugin.extract_entities(code, "ids.js");
1242        let ids = entities.iter().find(|e| e.name == "ids");
1243
1244        assert!(ids.is_some(), "Should find generator function, got: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1245        assert_eq!(ids.unwrap().entity_type, "function");
1246    }
1247
1248    #[test]
1249    fn test_nested_functions_typescript() {
1250        let code = r#"
1251function outer() {
1252    function inner() {
1253        return 42;
1254    }
1255    return inner();
1256}
1257"#;
1258        let plugin = CodeParserPlugin;
1259        let entities = plugin.extract_entities(code, "nested.ts");
1260        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1261        eprintln!("Nested TS: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
1262
1263        assert!(names.contains(&"outer"), "Should find outer, got: {:?}", names);
1264        assert!(names.contains(&"inner"), "Should find inner, got: {:?}", names);
1265
1266        let inner = entities.iter().find(|e| e.name == "inner").unwrap();
1267        assert!(inner.parent_id.is_some(), "inner should have parent_id");
1268    }
1269
1270    #[test]
1271    fn test_typescript_nested_anonymous_class_fields() {
1272        let code = r#"
1273class L1 {
1274  L2 = class {
1275    L3 = class {
1276      L4 = class {
1277        method() { return 1; }
1278      };
1279    };
1280  };
1281}
1282"#;
1283        let plugin = CodeParserPlugin;
1284        let entities = plugin.extract_entities(code, "a.ts");
1285        let find = |name: &str| {
1286            entities.iter().find(|e| e.name == name).unwrap_or_else(|| {
1287                panic!(
1288                    "missing {name}; got: {:?}",
1289                    entities
1290                        .iter()
1291                        .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1292                        .collect::<Vec<_>>()
1293                )
1294            })
1295        };
1296
1297        let l1 = find("L1");
1298        assert_eq!(l1.entity_type, "class");
1299        let l1_id = l1.id.clone();
1300
1301        let l2 = find("L2");
1302        assert_eq!(l2.entity_type, "field");
1303        assert_eq!(l2.parent_id.as_deref(), Some(l1_id.as_str()));
1304        let l2_id = l2.id.clone();
1305
1306        let l3 = find("L3");
1307        assert_eq!(l3.entity_type, "field");
1308        assert_eq!(l3.parent_id.as_deref(), Some(l2_id.as_str()));
1309        let l3_id = l3.id.clone();
1310
1311        let l4 = find("L4");
1312        assert_eq!(l4.entity_type, "field");
1313        assert_eq!(l4.parent_id.as_deref(), Some(l3_id.as_str()));
1314        let l4_id = l4.id.clone();
1315
1316        let method = find("method");
1317        assert_eq!(method.entity_type, "method");
1318        assert_eq!(method.parent_id.as_deref(), Some(l4_id.as_str()));
1319        assert_eq!(method.id, "a.ts::class::L1::L2::L3::L4::method");
1320    }
1321
1322    #[test]
1323    fn test_nested_functions_python() {
1324        let code = "def outer():\n    def inner():\n        return 42\n    return inner()\n";
1325        let plugin = CodeParserPlugin;
1326        let entities = plugin.extract_entities(code, "nested.py");
1327        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1328
1329        assert!(names.contains(&"outer"), "got: {:?}", names);
1330        assert!(names.contains(&"inner"), "got: {:?}", names);
1331
1332        let inner = entities.iter().find(|e| e.name == "inner").unwrap();
1333        assert!(inner.parent_id.is_some(), "inner should have parent_id");
1334    }
1335
1336    #[test]
1337    fn test_nested_functions_rust() {
1338        let code = "fn outer() {\n    fn inner() -> i32 {\n        42\n    }\n    inner();\n}\n";
1339        let plugin = CodeParserPlugin;
1340        let entities = plugin.extract_entities(code, "nested.rs");
1341        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1342
1343        assert!(names.contains(&"outer"), "got: {:?}", names);
1344        assert!(names.contains(&"inner"), "got: {:?}", names);
1345
1346        let inner = entities.iter().find(|e| e.name == "inner").unwrap();
1347        assert!(inner.parent_id.is_some(), "inner should have parent_id");
1348    }
1349
1350    #[test]
1351    fn test_rust_impl_blocks_unique_names() {
1352        let code = r#"
1353trait Greeting {
1354    fn greet(&self) -> String;
1355}
1356
1357struct Person;
1358struct Robot;
1359struct Cat;
1360
1361impl Greeting for Person {
1362    fn greet(&self) -> String { "Hello".to_string() }
1363}
1364
1365impl Greeting for Robot {
1366    fn greet(&self) -> String { "Beep".to_string() }
1367}
1368
1369impl Greeting for Cat {
1370    fn greet(&self) -> String { "Meow".to_string() }
1371}
1372"#;
1373        let plugin = CodeParserPlugin;
1374        let entities = plugin.extract_entities(code, "impls.rs");
1375        let impl_entities: Vec<&_> = entities.iter()
1376            .filter(|e| e.entity_type == "impl")
1377            .collect();
1378        let names: Vec<&str> = impl_entities.iter().map(|e| e.name.as_str()).collect();
1379
1380        assert_eq!(impl_entities.len(), 3, "Should find 3 impl blocks, got: {:?}", names);
1381        assert!(names.contains(&"Greeting for Person"), "got: {:?}", names);
1382        assert!(names.contains(&"Greeting for Robot"), "got: {:?}", names);
1383        assert!(names.contains(&"Greeting for Cat"), "got: {:?}", names);
1384    }
1385
1386    #[test]
1387    fn test_nested_functions_go() {
1388        // Go doesn't have named nested functions, but has nested type/var declarations
1389        let code = "package main\n\nfunc outer() {\n    var x int = 42\n    _ = x\n}\n";
1390        let plugin = CodeParserPlugin;
1391        let entities = plugin.extract_entities(code, "nested.go");
1392        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1393
1394        assert!(names.contains(&"outer"), "got: {:?}", names);
1395    }
1396
1397    #[test]
1398    fn test_renamed_function_same_structural_hash() {
1399        let code_a = "def get_card():\n    return db.query('cards')\n";
1400        let code_b = "def get_card_1():\n    return db.query('cards')\n";
1401
1402        let plugin = CodeParserPlugin;
1403        let entities_a = plugin.extract_entities(code_a, "a.py");
1404        let entities_b = plugin.extract_entities(code_b, "b.py");
1405
1406        assert_eq!(entities_a.len(), 1, "Should find one entity in a");
1407        assert_eq!(entities_b.len(), 1, "Should find one entity in b");
1408        assert_eq!(entities_a[0].name, "get_card");
1409        assert_eq!(entities_b[0].name, "get_card_1");
1410
1411        // Structural hash should match since only the name differs
1412        assert_eq!(
1413            entities_a[0].structural_hash, entities_b[0].structural_hash,
1414            "Renamed function with identical body should have same structural_hash"
1415        );
1416
1417        // Content hash should differ (it includes the name)
1418        assert_ne!(
1419            entities_a[0].content_hash, entities_b[0].content_hash,
1420            "Content hash should differ since raw content includes the name"
1421        );
1422    }
1423
1424    #[test]
1425    fn test_swift_renamed_operator_same_structural_hash() {
1426        let plugin = CodeParserPlugin;
1427        let entities_a = plugin.extract_entities("prefix operator ~~~\n", "a.swift");
1428        let entities_b = plugin.extract_entities("prefix operator !!!\n", "b.swift");
1429
1430        assert_eq!(entities_a.len(), 1, "Should find one entity in a");
1431        assert_eq!(entities_b.len(), 1, "Should find one entity in b");
1432        assert_eq!(entities_a[0].name, "~~~");
1433        assert_eq!(entities_b[0].name, "!!!");
1434        assert_eq!(entities_a[0].entity_type, "operator");
1435        assert_eq!(entities_b[0].entity_type, "operator");
1436        assert_eq!(
1437            entities_a[0].structural_hash, entities_b[0].structural_hash,
1438            "Renamed operator with otherwise identical declaration should have same structural_hash"
1439        );
1440        assert_ne!(
1441            entities_a[0].content_hash, entities_b[0].content_hash,
1442            "Content hash should differ since raw content includes the operator token"
1443        );
1444    }
1445
1446    #[test]
1447    fn test_swift_synthesized_names_disambiguate_overloads() {
1448        let plugin = CodeParserPlugin;
1449        let code = r#"
1450struct Matrix {
1451    subscript(row: Int) -> Double {
1452        return Double(row)
1453    }
1454
1455    subscript(row: Int, column: Int) -> Double {
1456        return Double(row + column)
1457    }
1458}
1459
1460class Builder {
1461    init(value: Int) {}
1462    init(text: String) {}
1463}
1464"#;
1465
1466        let entities = plugin.extract_entities(code, "Overloads.swift");
1467
1468        let subscript_ids: Vec<&str> = entities
1469            .iter()
1470            .filter(|e| e.entity_type == "subscript")
1471            .map(|e| e.id.as_str())
1472            .collect();
1473        assert_eq!(subscript_ids.len(), 2);
1474        assert_ne!(subscript_ids[0], subscript_ids[1]);
1475        assert!(subscript_ids.iter().all(|id| id.contains("@L")));
1476
1477        let init_ids: Vec<&str> = entities
1478            .iter()
1479            .filter(|e| e.entity_type == "init")
1480            .map(|e| e.id.as_str())
1481            .collect();
1482        assert_eq!(init_ids.len(), 2);
1483        assert_ne!(init_ids[0], init_ids[1]);
1484        assert!(init_ids.iter().all(|id| id.contains("@L")));
1485    }
1486
1487    #[test]
1488    fn test_hcl_entity_extraction() {
1489        let code = r#"
1490region = "eu-west-1"
1491
1492variable "image_id" {
1493  type = string
1494}
1495
1496resource "aws_instance" "web" {
1497  ami = var.image_id
1498
1499  lifecycle {
1500    create_before_destroy = true
1501  }
1502}
1503"#;
1504        let plugin = CodeParserPlugin;
1505        let entities = plugin.extract_entities(code, "main.tf");
1506        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1507        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1508        eprintln!("HCL entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
1509
1510        assert!(names.contains(&"region"), "Should find top-level attribute, got: {:?}", names);
1511        assert!(names.contains(&"variable.image_id"), "Should find variable block, got: {:?}", names);
1512        assert!(names.contains(&"resource.aws_instance.web"), "Should find resource block, got: {:?}", names);
1513        assert!(
1514            names.contains(&"resource.aws_instance.web.lifecycle"),
1515            "Should find nested lifecycle block with qualified name, got: {:?}",
1516            names
1517        );
1518        assert!(!names.contains(&"ami"), "Should skip nested attributes inside blocks, got: {:?}", names);
1519        assert!(
1520            !names.contains(&"create_before_destroy"),
1521            "Should skip nested attributes inside nested blocks, got: {:?}",
1522            names
1523        );
1524
1525        let lifecycle = entities
1526            .iter()
1527            .find(|e| e.name == "resource.aws_instance.web.lifecycle")
1528            .unwrap();
1529        assert!(lifecycle.parent_id.is_some(), "lifecycle should be nested under resource");
1530        assert!(types.contains(&"attribute"), "Should preserve attribute entity type for top-level attributes");
1531    }
1532
1533    #[test]
1534    fn test_kotlin_entity_extraction() {
1535        let code = r#"
1536class UserService {
1537    val name: String = ""
1538
1539    fun greet(): String {
1540        return "Hello, $name"
1541    }
1542
1543    companion object {
1544        fun create(): UserService = UserService()
1545    }
1546}
1547
1548interface Repository {
1549    fun findById(id: Int): Any?
1550}
1551
1552object AppConfig {
1553    val version = "1.0"
1554}
1555
1556fun topLevel(x: Int): Int = x * 2
1557"#;
1558        let plugin = CodeParserPlugin;
1559        let entities = plugin.extract_entities(code, "App.kt");
1560        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1561        eprintln!("Kotlin entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1562        assert!(names.contains(&"UserService"), "got: {:?}", names);
1563        assert!(names.contains(&"greet"), "got: {:?}", names);
1564        assert!(names.contains(&"Repository"), "got: {:?}", names);
1565        assert!(names.contains(&"findById"), "got: {:?}", names);
1566        assert!(names.contains(&"AppConfig"), "got: {:?}", names);
1567        assert!(names.contains(&"topLevel"), "got: {:?}", names);
1568    }
1569
1570    #[test]
1571    fn test_xml_entity_extraction() {
1572        let code = r#"<?xml version="1.0" encoding="UTF-8"?>
1573<project>
1574    <groupId>com.example</groupId>
1575    <artifactId>my-app</artifactId>
1576    <dependencies>
1577        <dependency>
1578            <groupId>junit</groupId>
1579            <artifactId>junit</artifactId>
1580        </dependency>
1581    </dependencies>
1582    <build>
1583        <plugins>
1584            <plugin>
1585                <groupId>org.apache.maven</groupId>
1586            </plugin>
1587        </plugins>
1588    </build>
1589</project>
1590"#;
1591        let plugin = CodeParserPlugin;
1592        let entities = plugin.extract_entities(code, "pom.xml");
1593        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1594        eprintln!("XML entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1595        assert!(names.contains(&"project"), "got: {:?}", names);
1596        assert!(names.contains(&"dependencies"), "got: {:?}", names);
1597        assert!(names.contains(&"build"), "got: {:?}", names);
1598    }
1599
1600    #[test]
1601    fn test_arrow_callback_scope_boundary_typescript() {
1602        // Arrow function callbacks: locals are suppressed, but inner
1603        // class/function declarations are still extracted. Nested callbacks
1604        // also suppress their locals.
1605        let code = r#"
1606const activeQueues = [
1607  { queue: queues.fooQueue, processor: foo.process },
1608];
1609
1610activeQueues.forEach((handler: any) => {
1611  const queue = handler.queue;
1612  let retries = 0;
1613
1614  class QueueHandler {
1615    handle() { return queue; }
1616  }
1617
1618  function createHandler() {
1619    return new QueueHandler();
1620  }
1621
1622  queue.process((job) => {
1623    const orderId = job.data.orderId;
1624    return orderId;
1625  });
1626});
1627
1628function handleFailure(job: any, err: any) {
1629  console.error('failed', err);
1630}
1631"#;
1632        let plugin = CodeParserPlugin;
1633        let entities = plugin.extract_entities(code, "process.ts");
1634        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1635        let top_level: Vec<&str> = entities
1636            .iter()
1637            .filter(|e| e.parent_id.is_none())
1638            .map(|e| e.name.as_str())
1639            .collect();
1640
1641        // Top-level entities preserved
1642        assert!(top_level.contains(&"activeQueues"), "got: {:?}", top_level);
1643        assert!(top_level.contains(&"handleFailure"), "got: {:?}", top_level);
1644
1645        // Declarations inside callback extracted
1646        assert!(names.contains(&"QueueHandler"), "got: {:?}", names);
1647        assert!(names.contains(&"handle"), "got: {:?}", names);
1648        assert!(names.contains(&"createHandler"), "got: {:?}", names);
1649
1650        // Locals inside callbacks suppressed
1651        assert!(!names.contains(&"queue"), "got: {:?}", names);
1652        assert!(!names.contains(&"retries"), "got: {:?}", names);
1653        assert!(!names.contains(&"orderId"), "got: {:?}", names);
1654    }
1655
1656    #[test]
1657    fn test_top_level_iife_wrapper_still_extracts_typescript_entities() {
1658        let code = r#"
1659function factory() {
1660  class Foo {
1661    method(): number {
1662      return 1;
1663    }
1664  }
1665
1666  function bar(): Foo {
1667    return new Foo();
1668  }
1669}
1670
1671factory();
1672"#;
1673        let plugin = CodeParserPlugin;
1674        let entities = plugin.extract_entities(code, "wrapped.ts");
1675        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1676        assert!(
1677            names.contains(&"factory"),
1678            "Should find top-level wrapper function, got: {:?}",
1679            names
1680        );
1681        assert!(
1682            names.contains(&"Foo"),
1683            "Should find class inside top-level wrapper, got: {:?}",
1684            names
1685        );
1686        assert!(
1687            names.contains(&"bar"),
1688            "Should find function inside top-level wrapper, got: {:?}",
1689            names
1690        );
1691    }
1692
1693    #[test]
1694    fn test_top_level_iife_still_extracts_typescript_entities() {
1695        let code = r#"
1696(() => {
1697  class Foo {
1698    method(): number {
1699      return 1;
1700    }
1701  }
1702
1703  function bar(): Foo {
1704    return new Foo();
1705  }
1706})();
1707"#;
1708        let plugin = CodeParserPlugin;
1709        let entities = plugin.extract_entities(code, "iife.ts");
1710        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1711        assert!(
1712            names.contains(&"Foo"),
1713            "Should find class inside top-level IIFE, got: {:?}",
1714            names
1715        );
1716        assert!(
1717            names.contains(&"bar"),
1718            "Should find function inside top-level IIFE, got: {:?}",
1719            names
1720        );
1721    }
1722
1723    #[test]
1724    fn test_function_locals_not_extracted_as_nested_entities_typescript() {
1725        let code = r#"
1726export default function foo() {
1727  const x = 1;
1728  return x;
1729}
1730"#;
1731        let plugin = CodeParserPlugin;
1732        let entities = plugin.extract_entities(code, "default-export.ts");
1733        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1734        assert!(
1735            names.contains(&"foo"),
1736            "Should find exported function, got: {:?}",
1737            names
1738        );
1739        assert!(
1740            !names.contains(&"x"),
1741            "Local inside function should not be extracted as an entity, got: {:?}",
1742            names
1743        );
1744    }
1745
1746    #[test]
1747    fn test_function_expression_scope_boundary_typescript() {
1748        // Function expressions: assigned to variables, or used as callback
1749        // arguments. Locals are suppressed in all cases.
1750        let code = r#"
1751const foo = function namedExpr(x: number) {
1752  const inner = x + 1;
1753  return inner;
1754};
1755
1756const bar = function(y: number) {
1757  const local = y * 2;
1758  return local;
1759};
1760
1761const items = [1, 2, 3];
1762
1763items.forEach(function process(item) {
1764  const doubled = item * 2;
1765  console.log(doubled);
1766});
1767"#;
1768        let plugin = CodeParserPlugin;
1769        let entities = plugin.extract_entities(code, "funexpr.ts");
1770        let top_level: Vec<&str> = entities
1771            .iter()
1772            .filter(|e| e.parent_id.is_none())
1773            .map(|e| e.name.as_str())
1774            .collect();
1775        let find = |name: &str| entities.iter().find(|e| e.name == name).unwrap();
1776        let all_names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1777
1778        // Top-level declarations preserved, and const-assigned function
1779        // expressions are promoted from variable to function.
1780        assert!(top_level.contains(&"foo"), "got: {:?}", top_level);
1781        assert!(top_level.contains(&"bar"), "got: {:?}", top_level);
1782        assert!(top_level.contains(&"items"), "got: {:?}", top_level);
1783        assert_eq!(find("foo").entity_type, "function");
1784        assert_eq!(find("bar").entity_type, "function");
1785        assert_eq!(find("items").entity_type, "variable");
1786
1787        // Locals inside function expressions suppressed
1788        assert!(!all_names.contains(&"inner"), "got: {:?}", all_names);
1789        assert!(!all_names.contains(&"local"), "got: {:?}", all_names);
1790        assert!(!all_names.contains(&"doubled"), "got: {:?}", all_names);
1791
1792        // Named function expression used as callback argument not extracted
1793        assert!(!top_level.contains(&"process"), "got: {:?}", top_level);
1794    }
1795
1796    #[test]
1797    fn test_variable_assigned_arrow_extracts_inner_entities() {
1798        // Arrow function assigned to a variable: inner class/function
1799        // declarations should be extracted, locals should be suppressed.
1800        let code = r#"
1801const handler = () => {
1802  class Inner {
1803    run() { return 1; }
1804  }
1805
1806  function make() {
1807    return new Inner();
1808  }
1809
1810  const local = 42;
1811};
1812"#;
1813        let plugin = CodeParserPlugin;
1814        let entities = plugin.extract_entities(code, "assigned.ts");
1815        let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1816        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1817
1818        assert_eq!(handler.entity_type, "function");
1819        assert!(names.contains(&"handler"), "got: {:?}", names);
1820        assert!(names.contains(&"Inner"), "got: {:?}", names);
1821        assert!(names.contains(&"run"), "got: {:?}", names);
1822        assert!(names.contains(&"make"), "got: {:?}", names);
1823        assert!(!names.contains(&"local"), "got: {:?}", names);
1824    }
1825
1826    #[test]
1827    fn test_variable_assigned_function_expression_extracts_inner_entities() {
1828        // Function expression assigned to a variable: same behavior.
1829        let code = r#"
1830const handler = function() {
1831  class Inner {}
1832  function make() { return new Inner(); }
1833  const local = 42;
1834};
1835"#;
1836        let plugin = CodeParserPlugin;
1837        let entities = plugin.extract_entities(code, "funexpr-inner.ts");
1838        let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1839        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1840
1841        assert_eq!(handler.entity_type, "function");
1842        assert!(names.contains(&"handler"), "got: {:?}", names);
1843        assert!(names.contains(&"Inner"), "got: {:?}", names);
1844        assert!(names.contains(&"make"), "got: {:?}", names);
1845        assert!(!names.contains(&"local"), "got: {:?}", names);
1846    }
1847
1848    #[test]
1849    fn test_let_assigned_arrow_stays_variable_typescript() {
1850        let code = r#"
1851let handler = () => {
1852  return 42;
1853};
1854"#;
1855        let plugin = CodeParserPlugin;
1856        let entities = plugin.extract_entities(code, "let-assigned.ts");
1857        let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1858
1859        assert_eq!(handler.entity_type, "variable");
1860    }
1861
1862    #[test]
1863    fn test_const_assigned_arrow_promoted_to_function_javascript() {
1864        let code = r#"
1865const handler = () => {
1866  return 42;
1867};
1868"#;
1869        let plugin = CodeParserPlugin;
1870        let entities = plugin.extract_entities(code, "handler.js");
1871        let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1872
1873        assert_eq!(handler.entity_type, "function");
1874    }
1875
1876    #[test]
1877    fn test_js_ts_multi_declarator_promotes_each_const_initializer() {
1878        let code = r#"
1879const value = 1, handler = () => value;
1880const first = () => 1, second = 2;
1881"#;
1882        let plugin = CodeParserPlugin;
1883        let entities = plugin.extract_entities(code, "sample.ts");
1884        let find = |name: &str| {
1885            entities.iter().find(|e| e.name == name).unwrap_or_else(|| {
1886                panic!(
1887                    "missing {name}; got: {:?}",
1888                    entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>()
1889                )
1890            })
1891        };
1892
1893        assert_eq!(find("value").entity_type, "variable");
1894        assert_eq!(find("handler").entity_type, "function");
1895        assert_eq!(find("first").entity_type, "function");
1896        assert_eq!(find("second").entity_type, "variable");
1897    }
1898
1899    #[test]
1900    fn test_suppressed_multi_declarator_traverses_skipped_initializers() {
1901        let code = r#"
1902function wrapper() {
1903  const holder = class {
1904    run() { return 1; }
1905  }, handler = () => {
1906    class Inner {
1907      go() { return 2; }
1908    }
1909  }, value = 1;
1910}
1911"#;
1912        let plugin = CodeParserPlugin;
1913        let entities = plugin.extract_entities(code, "sample.ts");
1914        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1915        let find = |name: &str| {
1916            entities.iter().find(|e| e.name == name).unwrap_or_else(|| {
1917                panic!(
1918                    "missing {name}; got: {:?}",
1919                    entities
1920                        .iter()
1921                        .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1922                        .collect::<Vec<_>>()
1923                )
1924            })
1925        };
1926
1927        assert_eq!(find("wrapper").entity_type, "function");
1928        assert_eq!(find("handler").entity_type, "function");
1929        assert!(names.contains(&"run"), "got: {:?}", names);
1930        assert!(names.contains(&"Inner"), "got: {:?}", names);
1931        assert!(names.contains(&"go"), "got: {:?}", names);
1932        assert!(!names.contains(&"holder"), "got: {:?}", names);
1933        assert!(!names.contains(&"value"), "got: {:?}", names);
1934    }
1935
1936    #[test]
1937    fn test_go_var_declaration() {
1938        let code = r#"package featuremgmt
1939
1940type FeatureFlag struct {
1941	Name        string
1942	Description string
1943	Stage       string
1944}
1945
1946var standardFeatureFlags = []FeatureFlag{
1947	{
1948		Name:        "panelTitleSearch",
1949		Description: "Search for dashboards using panel title",
1950		Stage:       "PublicPreview",
1951	},
1952}
1953
1954func GetFlags() []FeatureFlag {
1955	return standardFeatureFlags
1956}
1957"#;
1958        let plugin = CodeParserPlugin;
1959        let entities = plugin.extract_entities(code, "flags.go");
1960        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1961        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1962        eprintln!("Go entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
1963
1964        assert!(names.contains(&"FeatureFlag"), "Should find type FeatureFlag, got: {:?}", names);
1965        assert!(names.contains(&"standardFeatureFlags"), "Should find var standardFeatureFlags, got: {:?}", names);
1966        assert!(names.contains(&"GetFlags"), "Should find func GetFlags, got: {:?}", names);
1967    }
1968
1969    #[test]
1970    fn test_go_grouped_var_declaration() {
1971        let code = r#"package test
1972
1973var (
1974	simple = 42
1975	flags = []string{"a", "b"}
1976)
1977
1978const (
1979	x = 1
1980	y = 2
1981)
1982
1983func main() {}
1984"#;
1985        let plugin = CodeParserPlugin;
1986        let entities = plugin.extract_entities(code, "test.go");
1987        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1988        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1989        eprintln!("Go grouped entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
1990
1991        assert!(names.contains(&"flags") || names.contains(&"simple"), "Should find grouped var, got: {:?}", names);
1992        assert!(names.contains(&"x"), "Should find grouped const x, got: {:?}", names);
1993        assert!(names.contains(&"main"), "Should find func main, got: {:?}", names);
1994    }
1995
1996    #[test]
1997    fn test_dart_entity_extraction() {
1998        let code = r#"
1999import 'dart:math';
2000
2001class Calculator {
2002  final String name;
2003
2004  Calculator(this.name);
2005
2006  Calculator.withDefault() : name = 'default';
2007
2008  factory Calculator.create(String name) {
2009    return Calculator(name);
2010  }
2011
2012  int add(int a, int b) {
2013    return a + b;
2014  }
2015
2016  int get doubleAdd => add(1, 1) * 2;
2017
2018  set label(String value) {
2019    // no-op
2020  }
2021
2022  int operator +(Calculator other) {
2023    return 0;
2024  }
2025}
2026
2027mixin Loggable {
2028  void log(String message) {
2029    print(message);
2030  }
2031}
2032
2033extension StringExt on String {
2034  bool get isBlank => trim().isEmpty;
2035}
2036
2037enum Status {
2038  active,
2039  inactive;
2040
2041  String display() => name.toUpperCase();
2042}
2043
2044typedef Callback = void Function(int);
2045
2046int add(int a, int b) {
2047  return a + b;
2048}
2049
2050extension type Wrapper(int value) implements int {}
2051"#;
2052        let plugin = CodeParserPlugin;
2053        let entities = plugin.extract_entities(code, "calculator.dart");
2054        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2055        eprintln!(
2056            "Dart entities: {:?}",
2057            entities
2058                .iter()
2059                .map(|e| (&e.name, &e.entity_type, &e.parent_id))
2060                .collect::<Vec<_>>()
2061        );
2062
2063        // Top-level declarations
2064        assert!(names.contains(&"Calculator"), "Should find class, got: {:?}", names);
2065        assert!(names.contains(&"Loggable"), "Should find mixin, got: {:?}", names);
2066        assert!(names.contains(&"StringExt"), "Should find extension, got: {:?}", names);
2067        assert!(names.contains(&"Status"), "Should find enum, got: {:?}", names);
2068        assert!(names.contains(&"Callback"), "Should find typedef, got: {:?}", names);
2069        assert!(names.contains(&"add"), "Should find top-level function, got: {:?}", names);
2070        assert!(names.contains(&"Wrapper"), "Should find extension type, got: {:?}", names);
2071
2072        // Class members with correct types
2073        let add_method = entities.iter().find(|e| e.name == "add" && e.parent_id.is_some());
2074        assert!(add_method.is_some(), "Should find add method inside Calculator");
2075        assert_eq!(add_method.unwrap().entity_type, "method");
2076
2077        // Named constructor gets distinct name from unnamed constructor
2078        let unnamed_ctor = entities.iter().find(|e| e.name == "Calculator" && e.entity_type == "constructor");
2079        assert!(unnamed_ctor.is_some(), "Should find unnamed constructor");
2080        let named_ctor = entities.iter().find(|e| e.name == "Calculator.withDefault");
2081        assert!(named_ctor.is_some(), "Should find named constructor Calculator.withDefault, got: {:?}", names);
2082        assert_eq!(named_ctor.unwrap().entity_type, "constructor");
2083        assert_ne!(unnamed_ctor.unwrap().id, named_ctor.unwrap().id, "Named and unnamed constructors must have different entity IDs");
2084
2085        // Factory constructor
2086        let factory_ctor = entities.iter().find(|e| e.name == "Calculator.create");
2087        assert!(factory_ctor.is_some(), "Should find factory constructor Calculator.create, got: {:?}", names);
2088        assert_eq!(factory_ctor.unwrap().entity_type, "constructor");
2089
2090        // Getter, setter, operator
2091        let getter = entities.iter().find(|e| e.name == "doubleAdd");
2092        assert!(getter.is_some(), "Should find getter doubleAdd");
2093        assert_eq!(getter.unwrap().entity_type, "getter");
2094
2095        let setter = entities.iter().find(|e| e.name == "label");
2096        assert!(setter.is_some(), "Should find setter label");
2097        assert_eq!(setter.unwrap().entity_type, "setter");
2098
2099        let operator = entities.iter().find(|e| e.name == "operator +");
2100        assert!(operator.is_some(), "Should find operator +");
2101        assert_eq!(operator.unwrap().entity_type, "method");
2102
2103        // Mixin members have parent
2104        let log_method = entities.iter().find(|e| e.name == "log");
2105        assert!(log_method.is_some(), "Should find log in Loggable");
2106        assert!(log_method.unwrap().parent_id.is_some(), "log should have parent_id");
2107
2108        // Entity type mapping
2109        let callback = entities.iter().find(|e| e.name == "Callback").unwrap();
2110        assert_eq!(callback.entity_type, "type", "typedef should map to 'type'");
2111
2112        let loggable = entities.iter().find(|e| e.name == "Loggable").unwrap();
2113        assert_eq!(loggable.entity_type, "mixin");
2114
2115        let ext = entities.iter().find(|e| e.name == "StringExt").unwrap();
2116        assert_eq!(ext.entity_type, "extension");
2117
2118        let wrapper = entities.iter().find(|e| e.name == "Wrapper").unwrap();
2119        assert_eq!(wrapper.entity_type, "extension");
2120    }
2121
2122    #[test]
2123    #[cfg(feature = "lang-sql")]
2124    fn test_sql_entity_extraction() {
2125        let code = r#"
2126CREATE TABLE users (id INT PRIMARY KEY, name TEXT);
2127CREATE VIEW active_users AS SELECT * FROM users WHERE active;
2128CREATE FUNCTION add(a INT, b INT) RETURNS INT AS $$ BEGIN RETURN a + b; END; $$ LANGUAGE plpgsql;
2129CREATE INDEX idx_name ON users(name);
2130CREATE TYPE mood AS ENUM ('sad', 'happy');
2131CREATE SCHEMA myapp;
2132CREATE MATERIALIZED VIEW mv AS SELECT 1;
2133CREATE TABLE billing.invoices (id INT);
2134"#;
2135        let plugin = CodeParserPlugin;
2136        let entities = plugin.extract_entities(code, "schema.sql");
2137        let by_name = |n: &str| entities.iter().find(|e| e.name == n);
2138
2139        // object_reference names (incl. schema-qualified)
2140        assert_eq!(by_name("users").map(|e| e.entity_type.as_str()), Some("table"));
2141        assert_eq!(by_name("active_users").map(|e| e.entity_type.as_str()), Some("view"));
2142        assert_eq!(by_name("add").map(|e| e.entity_type.as_str()), Some("function"));
2143        assert_eq!(by_name("mood").map(|e| e.entity_type.as_str()), Some("type"));
2144        assert_eq!(by_name("mv").map(|e| e.entity_type.as_str()), Some("view"));
2145        assert_eq!(
2146            by_name("billing.invoices").map(|e| e.entity_type.as_str()),
2147            Some("table"),
2148            "schema-qualified table name should be preserved"
2149        );
2150
2151        // CREATE INDEX / SCHEMA name a bare identifier, not the ON-table
2152        assert_eq!(
2153            by_name("idx_name").map(|e| e.entity_type.as_str()),
2154            Some("index"),
2155            "index should be named idx_name, not the table it indexes"
2156        );
2157        assert_eq!(by_name("myapp").map(|e| e.entity_type.as_str()), Some("schema"));
2158    }
2159
2160    #[test]
2161    fn test_dart_top_level_function_includes_body() {
2162        let code = r#"
2163int add(int a, int b) {
2164  return a + b;
2165}
2166
2167String greet(String name) => 'Hello, $name!';
2168"#;
2169        let plugin = CodeParserPlugin;
2170        let entities = plugin.extract_entities(code, "funcs.dart");
2171        eprintln!(
2172            "Dart top-level: {:?}",
2173            entities
2174                .iter()
2175                .map(|e| (&e.name, &e.entity_type, &e.content))
2176                .collect::<Vec<_>>()
2177        );
2178
2179        let add_fn = entities.iter().find(|e| e.name == "add").unwrap();
2180        assert!(
2181            add_fn.content.contains("return a + b"),
2182            "Top-level function content should include the body, got: {:?}",
2183            add_fn.content
2184        );
2185
2186        let greet_fn = entities.iter().find(|e| e.name == "greet").unwrap();
2187        assert!(
2188            greet_fn.content.contains("Hello"),
2189            "Expression body should be included, got: {:?}",
2190            greet_fn.content
2191        );
2192
2193        // Body changes should produce different content_hash
2194        let code_v2 = r#"
2195int add(int a, int b) {
2196  return a * b;
2197}
2198
2199String greet(String name) => 'Hello, $name!';
2200"#;
2201        let entities_v2 = plugin.extract_entities(code_v2, "funcs.dart");
2202        let add_v2 = entities_v2.iter().find(|e| e.name == "add").unwrap();
2203        assert_ne!(
2204            add_fn.content_hash, add_v2.content_hash,
2205            "Body change should produce different content_hash"
2206        );
2207
2208        // Unchanged function should keep the same hash
2209        let greet_v2 = entities_v2.iter().find(|e| e.name == "greet").unwrap();
2210        assert_eq!(
2211            greet_fn.content_hash, greet_v2.content_hash,
2212            "Unchanged function should keep the same content_hash"
2213        );
2214    }
2215
2216    #[test]
2217    fn test_dart_renamed_named_constructor_same_structural_hash() {
2218        let code_a = r#"
2219class Foo {
2220  Foo.fromJson(Map<String, dynamic> json) {
2221    print(json);
2222  }
2223}
2224"#;
2225        let code_b = r#"
2226class Foo {
2227  Foo.fromMap(Map<String, dynamic> json) {
2228    print(json);
2229  }
2230}
2231"#;
2232        let plugin = CodeParserPlugin;
2233        let entities_a = plugin.extract_entities(code_a, "a.dart");
2234        let entities_b = plugin.extract_entities(code_b, "b.dart");
2235
2236        let ctor_a = entities_a.iter().find(|e| e.name == "Foo.fromJson").unwrap();
2237        let ctor_b = entities_b.iter().find(|e| e.name == "Foo.fromMap").unwrap();
2238
2239        assert_eq!(
2240            ctor_a.structural_hash, ctor_b.structural_hash,
2241            "Renamed named constructor with identical body should have same structural_hash"
2242        );
2243        assert_ne!(
2244            ctor_a.content_hash, ctor_b.content_hash,
2245            "Content hash should differ since raw content includes the name"
2246        );
2247    }
2248
2249    #[test]
2250    fn test_dart_top_level_getter_setter() {
2251        let code = r#"
2252int _value = 0;
2253
2254int get currentValue {
2255  return _value;
2256}
2257
2258set currentValue(int v) {
2259  _value = v;
2260}
2261"#;
2262        let plugin = CodeParserPlugin;
2263        let entities = plugin.extract_entities(code, "accessors.dart");
2264        eprintln!(
2265            "Dart top-level accessors: {:?}",
2266            entities
2267                .iter()
2268                .map(|e| (&e.name, &e.entity_type, &e.content))
2269                .collect::<Vec<_>>()
2270        );
2271
2272        let getter = entities.iter().find(|e| e.name == "currentValue" && e.entity_type == "getter");
2273        assert!(getter.is_some(), "Should find top-level getter, got: {:?}",
2274            entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2275        assert!(
2276            getter.unwrap().content.contains("return _value"),
2277            "Top-level getter content should include the body"
2278        );
2279        assert!(getter.unwrap().parent_id.is_none(), "Top-level getter should have no parent");
2280
2281        // tree-sitter-dart 0.2.0 parses top-level setters as function_signature
2282        // (treating `set` as a type_identifier). setter_signature is only
2283        // produced inside class_member → method_signature.
2284        let setter = entities.iter().find(|e| e.name == "currentValue" && e.entity_type == "function");
2285        assert!(setter.is_some(), "Should find top-level setter as function, got: {:?}",
2286            entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2287        assert!(
2288            setter.unwrap().content.contains("_value = v"),
2289            "Top-level setter content should include the body"
2290        );
2291    }
2292
2293    #[test]
2294    fn test_dart_field_entity_type() {
2295        let code = r#"
2296class Config {
2297  final String name;
2298  static const int maxRetries = 3;
2299}
2300"#;
2301        let plugin = CodeParserPlugin;
2302        let entities = plugin.extract_entities(code, "config.dart");
2303        eprintln!(
2304            "Dart fields: {:?}",
2305            entities
2306                .iter()
2307                .map(|e| (&e.name, &e.entity_type, &e.parent_id))
2308                .collect::<Vec<_>>()
2309        );
2310
2311        let name_field = entities.iter().find(|e| e.name == "name" && e.parent_id.is_some());
2312        assert!(name_field.is_some(), "Should find field 'name', got: {:?}",
2313            entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2314        assert_eq!(name_field.unwrap().entity_type, "field");
2315
2316        let max_retries = entities.iter().find(|e| e.name == "maxRetries");
2317        assert!(max_retries.is_some(), "Should find field 'maxRetries', got: {:?}",
2318            entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2319        assert_eq!(max_retries.unwrap().entity_type, "field");
2320    }
2321
2322    #[test]
2323    fn test_dart_identifier_list_fields() {
2324        // identifier_list produces bare identifier children (no "name" field),
2325        // unlike initialized_identifier_list which wraps each in an
2326        // initialized_identifier node with a "name" field.
2327        let code = r#"
2328abstract class Shape {
2329  abstract double x, y;
2330  abstract String label;
2331}
2332"#;
2333        let plugin = CodeParserPlugin;
2334        let entities = plugin.extract_entities(code, "shape.dart");
2335        eprintln!(
2336            "Dart identifier_list fields: {:?}",
2337            entities
2338                .iter()
2339                .map(|e| (&e.name, &e.entity_type, &e.parent_id))
2340                .collect::<Vec<_>>()
2341        );
2342
2343        let x_field = entities.iter().find(|e| e.name == "x");
2344        assert!(x_field.is_some(), "Should find field 'x' from identifier_list, got: {:?}",
2345            entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2346        assert_eq!(x_field.unwrap().entity_type, "field");
2347        assert!(x_field.unwrap().parent_id.is_some(), "field 'x' should be nested under Shape");
2348
2349        let label_field = entities.iter().find(|e| e.name == "label");
2350        assert!(label_field.is_some(), "Should find field 'label' from single-element identifier_list, got: {:?}",
2351            entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2352        assert_eq!(label_field.unwrap().entity_type, "field");
2353    }
2354
2355    #[test]
2356    fn test_ocaml_entity_extraction() {
2357        let code = r#"
2358type color = Red | Green | Blue
2359
2360type point = {
2361  x : float;
2362  y : float;
2363}
2364
2365exception Not_found of string
2366
2367let greet name =
2368  Printf.printf "Hello, %s!\n" name
2369
2370let add a b = a + b
2371
2372let version = "1.0"
2373
2374let color_to_string = function
2375  | Red -> "red"
2376  | Blue -> "blue"
2377
2378let inc = fun x -> x + 1
2379
2380module MyModule = struct
2381  let helper x = x * 2
2382end
2383
2384module type Printable = sig
2385  val to_string : 'a -> string
2386end
2387
2388external caml_input : in_channel -> bytes -> int -> int -> int = "caml_input"
2389
2390class point_class x_init = object
2391  val mutable x = x_init
2392  method get_x = x
2393end
2394
2395class type measurable = object
2396  method measure : float
2397end
2398"#;
2399        let plugin = CodeParserPlugin;
2400        let entities = plugin.extract_entities(code, "example.ml");
2401        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2402        eprintln!("OCaml entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2403
2404        let find = |name: &str| entities.iter().find(|e| e.name == name)
2405            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2406
2407        assert_eq!(find("color").entity_type, "type");
2408        assert_eq!(find("point").entity_type, "type");
2409        assert_eq!(find("Not_found").entity_type, "exception");
2410        assert_eq!(find("greet").entity_type, "function");
2411        assert_eq!(find("add").entity_type, "function");
2412        assert_eq!(find("version").entity_type, "value");
2413        assert_eq!(find("color_to_string").entity_type, "function");
2414        assert_eq!(find("inc").entity_type, "function");
2415        assert_eq!(find("MyModule").entity_type, "module");
2416        assert_eq!(find("Printable").entity_type, "module_type");
2417        assert_eq!(find("caml_input").entity_type, "external");
2418        assert_eq!(find("point_class").entity_type, "class");
2419        assert_eq!(find("measurable").entity_type, "class_type");
2420    }
2421
2422    #[test]
2423    fn test_ocaml_nested_module_entities() {
2424        let code = r#"
2425module Outer = struct
2426  let x = 42
2427
2428  module Inner = struct
2429    let y = 0
2430  end
2431end
2432"#;
2433        let plugin = CodeParserPlugin;
2434        let entities = plugin.extract_entities(code, "nested.ml");
2435        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2436        eprintln!("OCaml nested: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
2437
2438        let find = |name: &str| entities.iter().find(|e| e.name == name)
2439            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2440
2441        let outer = find("Outer");
2442        let x = find("x");
2443        let inner = find("Inner");
2444        let y = find("y");
2445
2446        assert_eq!(outer.entity_type, "module");
2447        assert_eq!(x.entity_type, "value");
2448        assert_eq!(inner.entity_type, "module");
2449        assert_eq!(y.entity_type, "value");
2450
2451        assert!(x.parent_id.as_ref().is_some_and(|p| p == &outer.id), "x should be nested under Outer");
2452        assert!(inner.parent_id.as_ref().is_some_and(|p| p == &outer.id), "Inner should be nested under Outer");
2453        assert!(y.parent_id.as_ref().is_some_and(|p| p == &inner.id), "y should be nested under Inner");
2454    }
2455
2456    #[test]
2457    fn test_ocaml_interface_entity_extraction() {
2458        let code = r#"
2459type t
2460
2461val create : string -> t
2462val to_string : t -> string
2463
2464exception Invalid_input of string
2465
2466module type Serializable = sig
2467  val serialize : t -> string
2468end
2469"#;
2470        let plugin = CodeParserPlugin;
2471        let entities = plugin.extract_entities(code, "example.mli");
2472        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2473        eprintln!("OCaml interface entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2474
2475        let find = |name: &str| entities.iter().find(|e| e.name == name)
2476            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2477
2478        assert_eq!(find("t").entity_type, "type");
2479        assert_eq!(find("create").entity_type, "val");
2480        assert_eq!(find("to_string").entity_type, "val");
2481        assert_eq!(find("Invalid_input").entity_type, "exception");
2482        assert_eq!(find("Serializable").entity_type, "module_type");
2483    }
2484
2485    #[test]
2486    fn test_ocaml_mutual_recursion_let() {
2487        let code = r#"
2488let rec even n = (n = 0) || odd (n - 1)
2489and odd n = (n <> 0) && even (n - 1)
2490
2491let rec ping x = pong (x - 1)
2492and pong x = if x <= 0 then 0 else ping (x - 1)
2493"#;
2494        let plugin = CodeParserPlugin;
2495        let entities = plugin.extract_entities(code, "mutual.ml");
2496        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2497        eprintln!("OCaml mutual let: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2498
2499        let find = |name: &str| entities.iter().find(|e| e.name == name)
2500            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2501
2502        assert_eq!(find("even").entity_type, "function");
2503        assert_eq!(find("odd").entity_type, "function");
2504        assert_eq!(find("ping").entity_type, "function");
2505        assert_eq!(find("pong").entity_type, "function");
2506    }
2507
2508    #[test]
2509    fn test_ocaml_mutual_recursion_module() {
2510        let code = r#"
2511module rec A : sig val x : int end = struct
2512  let x = B.y + 1
2513end
2514and B : sig val y : int end = struct
2515  let y = 0
2516end
2517"#;
2518        let plugin = CodeParserPlugin;
2519        let entities = plugin.extract_entities(code, "mutual_mod.ml");
2520        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2521        eprintln!("OCaml mutual module: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
2522
2523        let find = |name: &str| entities.iter().find(|e| e.name == name)
2524            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2525
2526        let a = find("A");
2527        let b = find("B");
2528        assert_eq!(a.entity_type, "module");
2529        assert_eq!(b.entity_type, "module");
2530
2531        let x = find("x");
2532        let y = find("y");
2533        assert!(x.parent_id.as_ref().is_some_and(|p| p == &a.id), "x should be nested under A");
2534        assert!(y.parent_id.as_ref().is_some_and(|p| p == &b.id), "y should be nested under B");
2535    }
2536
2537    #[test]
2538    fn test_ocaml_destructured_let() {
2539        let code = r#"
2540let (a, b) = (1, 2)
2541
2542let { x; y } = point
2543
2544let simple = 42
2545"#;
2546        let plugin = CodeParserPlugin;
2547        let entities = plugin.extract_entities(code, "destruct.ml");
2548        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2549        eprintln!("OCaml destructured: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2550
2551        let find = |name: &str| entities.iter().find(|e| e.name == name)
2552            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2553
2554        assert_eq!(find("a").entity_type, "value");
2555        assert_eq!(find("b").entity_type, "value");
2556        assert_eq!(find("x").entity_type, "value");
2557        assert_eq!(find("y").entity_type, "value");
2558        assert_eq!(find("simple").entity_type, "value");
2559    }
2560
2561    #[test]
2562    fn test_ocaml_mutual_recursion_class() {
2563        let code = r#"
2564class foo = object
2565  method x = 1
2566end
2567and bar = object
2568  method y = 2
2569end
2570"#;
2571        let plugin = CodeParserPlugin;
2572        let entities = plugin.extract_entities(code, "classes.ml");
2573        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2574        eprintln!("OCaml mutual class: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2575
2576        let find = |name: &str| entities.iter().find(|e| e.name == name)
2577            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2578
2579        assert_eq!(find("foo").entity_type, "class");
2580        assert_eq!(find("bar").entity_type, "class");
2581    }
2582
2583    #[test]
2584    fn test_perl_entity_extraction() {
2585        let code = r#"package Foo::Bar;
2586
2587use strict;
2588use warnings;
2589
2590sub hello {
2591    my ($self, $name) = @_;
2592    print "Hello, $name!\n";
2593}
2594
2595sub _private_helper {
2596    return 42;
2597}
2598
25991;
2600"#;
2601        let plugin = CodeParserPlugin;
2602        let entities = plugin.extract_entities(code, "Foo/Bar.pm");
2603        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2604
2605        assert!(names.contains(&"Foo::Bar"), "got: {:?}", names);
2606        assert!(names.contains(&"hello"), "got: {:?}", names);
2607        assert!(names.contains(&"_private_helper"), "got: {:?}", names);
2608
2609        let find = |name: &str| entities.iter().find(|e| e.name == name)
2610            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2611
2612        assert_eq!(find("Foo::Bar").entity_type, "package");
2613        assert_eq!(find("hello").entity_type, "function");
2614        assert_eq!(find("_private_helper").entity_type, "function");
2615    }
2616
2617    #[test]
2618    fn test_fortran_entity_extraction() {
2619        let code = r#"module math_utils
2620  implicit none
2621contains
2622  function add(a, b) result(c)
2623    integer, intent(in) :: a, b
2624    integer :: c
2625    c = a + b
2626  end function add
2627
2628  subroutine greet()
2629    print *, "hello"
2630  end subroutine greet
2631end module math_utils
2632
2633program main
2634  implicit none
2635  print *, "hello"
2636end program main
2637"#;
2638        let plugin = CodeParserPlugin;
2639        let entities = plugin.extract_entities(code, "test.f90");
2640        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2641
2642        assert!(names.contains(&"math_utils"), "got: {:?}", names);
2643        assert!(names.contains(&"add"), "got: {:?}", names);
2644        assert!(names.contains(&"greet"), "got: {:?}", names);
2645        assert!(names.contains(&"main"), "got: {:?}", names);
2646
2647        let find = |name: &str| entities.iter().find(|e| e.name == name)
2648            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
2649
2650        assert_eq!(find("math_utils").entity_type, "module");
2651        assert_eq!(find("add").entity_type, "function");
2652        assert_eq!(find("greet").entity_type, "subroutine");
2653        assert_eq!(find("main").entity_type, "program");
2654
2655        // Nested entities have parent
2656        assert!(find("add").parent_id.is_some());
2657        assert!(find("greet").parent_id.is_some());
2658    }
2659
2660    #[test]
2661    fn test_scala_entity_extraction() {
2662        let code = r#"
2663package com.example
2664
2665import scala.collection.mutable
2666
2667class UserService(val name: String) {
2668  def getUsers(): List[User] = db.findAll()
2669
2670  def createUser(user: User): Unit = db.save(user)
2671
2672  private def validate(user: User): Boolean = true
2673}
2674
2675object UserService {
2676  def apply(name: String): UserService = new UserService(name)
2677
2678  val DefaultName: String = "default"
2679}
2680
2681trait Repository[T] {
2682  def findById(id: String): Option[T]
2683  def findAll(): List[T]
2684}
2685
2686case class User(id: String, name: String)
2687
2688type UserId = String
2689"#;
2690        let plugin = CodeParserPlugin;
2691        let entities = plugin.extract_entities(code, "UserService.scala");
2692        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2693        eprintln!("Scala entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2694
2695        assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
2696        assert!(names.contains(&"Repository"), "Should find trait Repository, got: {:?}", names);
2697        assert!(names.contains(&"getUsers"), "Should find method getUsers, got: {:?}", names);
2698        assert!(names.contains(&"createUser"), "Should find method createUser, got: {:?}", names);
2699
2700        // Methods should be nested under class
2701        let get_users = entities.iter().find(|e| e.name == "getUsers").unwrap();
2702        assert!(get_users.parent_id.is_some(), "getUsers should have parent_id");
2703    }
2704
2705    #[test]
2706    fn test_scala3_entity_extraction() {
2707        let code = r#"
2708package com.example
2709
2710enum Color:
2711  case Red, Green, Blue
2712
2713enum Planet(mass: Double, radius: Double):
2714  case Mercury extends Planet(3.303e+23, 2.4397e6)
2715  case Venus   extends Planet(4.869e+24, 6.0518e6)
2716
2717object Main:
2718  def main(args: Array[String]): Unit =
2719    println("Hello, World!")
2720
2721trait Greeter:
2722  def greet(name: String): String
2723
2724given Greeter with
2725  def greet(name: String): String = s"Hello, $name!"
2726
2727extension (s: String)
2728  def shout: String = s.toUpperCase + "!"
2729
2730type Predicate[A] = A => Boolean
2731"#;
2732        let plugin = CodeParserPlugin;
2733        let entities = plugin.extract_entities(code, "Main.scala");
2734        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2735        eprintln!("Scala 3 entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
2736
2737        assert!(names.contains(&"Color"), "Should find enum Color, got: {:?}", names);
2738        assert!(names.contains(&"Planet"), "Should find enum Planet, got: {:?}", names);
2739        assert!(names.contains(&"Main"), "Should find object Main, got: {:?}", names);
2740        assert!(names.contains(&"Greeter"), "Should find trait Greeter, got: {:?}", names);
2741        assert!(names.contains(&"Predicate"), "Should find type alias Predicate, got: {:?}", names);
2742    }
2743
2744    #[test]
2745    fn test_zig_entity_extraction() {
2746        let code = r#"
2747const std = @import("std");
2748
2749pub const Point = struct {
2750    x: i32,
2751    y: i32,
2752};
2753
2754pub const Color = enum {
2755    red,
2756    green,
2757    blue,
2758};
2759
2760const Person = struct {
2761    name: []const u8,
2762    age: u32,
2763};
2764
2765pub fn greet(name: []const u8) void {
2766    std.debug.print("Hello, {s}!\n", .{name});
2767}
2768
2769fn add(a: i32, b: i32) i32 {
2770    return a + b;
2771}
2772
2773pub fn main() !void {
2774    greet("world");
2775}
2776
2777test "basic addition" {
2778    const result = add(2, 3);
2779    _ = result;
2780}
2781"#;
2782        let plugin = CodeParserPlugin;
2783        let entities = plugin.extract_entities(code, "main.zig");
2784        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2785        let types: std::collections::HashMap<&str, &str> = entities
2786            .iter()
2787            .map(|e| (e.name.as_str(), e.entity_type.as_str()))
2788            .collect();
2789
2790        assert!(names.contains(&"greet"), "Should find greet, got: {:?}", names);
2791        assert!(names.contains(&"add"), "Should find add, got: {:?}", names);
2792        assert!(names.contains(&"main"), "Should find main, got: {:?}", names);
2793        assert!(names.contains(&"Point"), "Should find Point, got: {:?}", names);
2794        assert!(names.contains(&"Color"), "Should find Color, got: {:?}", names);
2795        assert!(names.contains(&"Person"), "Should find Person, got: {:?}", names);
2796
2797        assert_eq!(types["greet"], "function");
2798        assert_eq!(types["add"], "function");
2799        assert_eq!(types["Point"], "struct");
2800        assert_eq!(types["Color"], "enum");
2801        assert_eq!(types["Person"], "struct");
2802    }
2803
2804    #[test]
2805    #[cfg(feature = "lang-edn")]
2806    fn test_edn_deps_edn_map_entries() {
2807        let code = r#"{:deps {org.clojure/clojure {:mvn/version "1.11.0"}}
2808 :paths ["src" "resources"]
2809 :aliases {:dev {:extra-deps {cider/cider-nrepl {:mvn/version "0.28.5"}}}}}"#;
2810        let plugin = CodeParserPlugin;
2811        let entities = plugin.extract_entities(code, "deps.edn");
2812        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2813        let types: std::collections::HashMap<&str, &str> = entities
2814            .iter()
2815            .map(|e| (e.name.as_str(), e.entity_type.as_str()))
2816            .collect();
2817
2818        assert!(names.contains(&":deps"), "Should find :deps, got: {:?}", names);
2819        assert!(names.contains(&":paths"), "Should find :paths, got: {:?}", names);
2820        assert!(names.contains(&":aliases"), "Should find :aliases, got: {:?}", names);
2821        assert_eq!(names.len(), 3, "Should have exactly 3 entries, got: {:?}", names);
2822        assert_eq!(types[":deps"], "entry");
2823        assert_eq!(types[":paths"], "entry");
2824        assert_eq!(types[":aliases"], "entry");
2825    }
2826
2827    #[test]
2828    #[cfg(feature = "lang-edn")]
2829    fn test_edn_nested_map_values_not_extracted() {
2830        // Inner map entries (inside :aliases) must not leak as top-level entities.
2831        let code = r#"{:a {:b 1 :c 2} :d 3}"#;
2832        let plugin = CodeParserPlugin;
2833        let entities = plugin.extract_entities(code, "config.edn");
2834        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2835
2836        assert!(names.contains(&":a"), "Should find :a, got: {:?}", names);
2837        assert!(names.contains(&":d"), "Should find :d, got: {:?}", names);
2838        assert!(!names.contains(&":b"), "Inner :b should not be extracted");
2839        assert!(!names.contains(&":c"), "Inner :c should not be extracted");
2840        assert_eq!(names.len(), 2);
2841    }
2842
2843    #[test]
2844    #[cfg(feature = "lang-edn")]
2845    fn test_edn_non_map_top_level_forms_not_extracted() {
2846        // A bare vector at the top level has no meaningful name and yields no entities.
2847        let code = r#"["alpha" "beta"]"#;
2848        let plugin = CodeParserPlugin;
2849        let entities = plugin.extract_entities(code, "data.edn");
2850        assert_eq!(entities.len(), 0);
2851    }
2852
2853    #[test]
2854    #[cfg(feature = "lang-edn")]
2855    fn test_edn_symbol_keys_extracted() {
2856        let code = r#"{foo 1 bar 2}"#;
2857        let plugin = CodeParserPlugin;
2858        let entities = plugin.extract_entities(code, "sym.edn");
2859        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2860
2861        assert!(names.contains(&"foo"), "Should find foo, got: {:?}", names);
2862        assert!(names.contains(&"bar"), "Should find bar, got: {:?}", names);
2863    }
2864}