Skip to main content

sem_core/parser/plugins/code/
mod.rs

1mod entity_extractor;
2pub mod languages;
3
4use std::cell::RefCell;
5use std::collections::HashMap;
6
7use crate::model::entity::SemanticEntity;
8use crate::parser::plugin::SemanticParserPlugin;
9use languages::{get_all_code_extensions, get_language_config};
10use entity_extractor::extract_entities;
11
12pub struct CodeParserPlugin;
13
14// Thread-local parser cache: one Parser per language per thread.
15// Avoids creating a new Parser for every file during parallel graph builds.
16thread_local! {
17    static PARSER_CACHE: RefCell<HashMap<&'static str, tree_sitter::Parser>> = RefCell::new(HashMap::new());
18}
19
20impl SemanticParserPlugin for CodeParserPlugin {
21    fn id(&self) -> &str {
22        "code"
23    }
24
25    fn extensions(&self) -> &[&str] {
26        get_all_code_extensions()
27    }
28
29    fn extract_entities(&self, content: &str, file_path: &str) -> Vec<SemanticEntity> {
30        let ext = std::path::Path::new(file_path)
31            .extension()
32            .and_then(|e| e.to_str())
33            .map(|e| format!(".{}", e.to_lowercase()))
34            .unwrap_or_default();
35
36        let config = match get_language_config(&ext) {
37            Some(c) => c,
38            None => {
39                // Try shebang detection for extensionless files
40                match detect_ext_from_content(content)
41                    .and_then(|se| get_language_config(&se))
42                {
43                    Some(c) => c,
44                    None => return Vec::new(),
45                }
46            }
47        };
48
49        let language = match (config.get_language)() {
50            Some(lang) => lang,
51            None => return Vec::new(),
52        };
53
54        PARSER_CACHE.with(|cache| {
55            let mut cache = cache.borrow_mut();
56            let parser = cache.entry(config.id).or_insert_with(|| {
57                let mut p = tree_sitter::Parser::new();
58                let _ = p.set_language(&language);
59                p
60            });
61
62            let tree = match parser.parse(content.as_bytes(), None) {
63                Some(t) => t,
64                None => return Vec::new(),
65            };
66
67            extract_entities(&tree, file_path, config, content)
68        })
69    }
70}
71
72use crate::parser::registry::detect_ext_from_content;
73
74#[cfg(test)]
75mod tests {
76    use super::*;
77
78    #[test]
79    fn test_java_entity_extraction() {
80        let code = r#"
81package com.example;
82
83import java.util.List;
84
85public class UserService {
86    private String name;
87
88    public UserService(String name) {
89        this.name = name;
90    }
91
92    public List<User> getUsers() {
93        return db.findAll();
94    }
95
96    public void createUser(User user) {
97        db.save(user);
98    }
99}
100
101interface Repository<T> {
102    T findById(String id);
103    List<T> findAll();
104}
105
106enum Status {
107    ACTIVE,
108    INACTIVE,
109    DELETED
110}
111"#;
112        let plugin = CodeParserPlugin;
113        let entities = plugin.extract_entities(code, "UserService.java");
114        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
115        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
116        eprintln!("Java entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
117
118        assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
119        assert!(names.contains(&"Repository"), "Should find interface Repository, got: {:?}", names);
120        assert!(names.contains(&"Status"), "Should find enum Status, got: {:?}", names);
121    }
122
123    #[test]
124    fn test_java_nested_methods() {
125        let code = r#"
126public class Calculator {
127    public int add(int a, int b) {
128        return a + b;
129    }
130
131    public int subtract(int a, int b) {
132        return a - b;
133    }
134}
135"#;
136        let plugin = CodeParserPlugin;
137        let entities = plugin.extract_entities(code, "Calculator.java");
138        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
139        eprintln!("Java nested: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
140
141        assert!(names.contains(&"Calculator"), "Should find Calculator class");
142        assert!(names.contains(&"add"), "Should find add method, got: {:?}", names);
143        assert!(names.contains(&"subtract"), "Should find subtract method, got: {:?}", names);
144
145        // Methods should have Calculator as parent
146        let add = entities.iter().find(|e| e.name == "add").unwrap();
147        assert!(add.parent_id.is_some(), "add should have parent_id");
148    }
149
150    #[test]
151    fn test_c_entity_extraction() {
152        let code = r#"
153#include <stdio.h>
154
155struct Point {
156    int x;
157    int y;
158};
159
160enum Color {
161    RED,
162    GREEN,
163    BLUE
164};
165
166typedef struct {
167    char name[50];
168    int age;
169} Person;
170
171void greet(const char* name) {
172    printf("Hello, %s!\n", name);
173}
174
175int add(int a, int b) {
176    return a + b;
177}
178
179int main() {
180    greet("world");
181    return 0;
182}
183"#;
184        let plugin = CodeParserPlugin;
185        let entities = plugin.extract_entities(code, "main.c");
186        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
187        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
188        eprintln!("C entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
189
190        assert!(names.contains(&"greet"), "Should find greet function, got: {:?}", names);
191        assert!(names.contains(&"add"), "Should find add function, got: {:?}", names);
192        assert!(names.contains(&"main"), "Should find main function, got: {:?}", names);
193        assert!(names.contains(&"Point"), "Should find Point struct, got: {:?}", names);
194        assert!(names.contains(&"Color"), "Should find Color enum, got: {:?}", names);
195    }
196
197    #[test]
198    fn test_cpp_entity_extraction() {
199        let code = "namespace math {\nclass Vector3 {\npublic:\n    float length() const { return 0; }\n};\n}\nvoid greet() {}\n";
200        let plugin = CodeParserPlugin;
201        let entities = plugin.extract_entities(code, "main.cpp");
202        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
203        assert!(names.contains(&"math"), "got: {:?}", names);
204        assert!(names.contains(&"Vector3"), "got: {:?}", names);
205        assert!(names.contains(&"greet"), "got: {:?}", names);
206    }
207
208    #[test]
209    fn test_ruby_entity_extraction() {
210        let code = "module Auth\n  class User\n    def greet\n      \"hi\"\n    end\n  end\nend\ndef helper(x)\n  x * 2\nend\n";
211        let plugin = CodeParserPlugin;
212        let entities = plugin.extract_entities(code, "auth.rb");
213        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
214        assert!(names.contains(&"Auth"), "got: {:?}", names);
215        assert!(names.contains(&"User"), "got: {:?}", names);
216        assert!(names.contains(&"helper"), "got: {:?}", names);
217    }
218
219    #[test]
220    fn test_csharp_entity_extraction() {
221        let code = "namespace MyApp {\npublic class User {\n    public string GetName() { return \"\"; }\n}\npublic enum Role { Admin, User }\n}\n";
222        let plugin = CodeParserPlugin;
223        let entities = plugin.extract_entities(code, "Models.cs");
224        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
225        assert!(names.contains(&"MyApp"), "got: {:?}", names);
226        assert!(names.contains(&"User"), "got: {:?}", names);
227        assert!(names.contains(&"Role"), "got: {:?}", names);
228    }
229
230    #[test]
231    fn test_swift_entity_extraction() {
232        let code = r#"
233import Foundation
234
235class UserService {
236    var name: String
237
238    init(name: String) {
239        self.name = name
240    }
241
242    func getUsers() -> [User] {
243        return db.findAll()
244    }
245}
246
247struct Point {
248    var x: Double
249    var y: Double
250}
251
252enum Status {
253    case active
254    case inactive
255    case deleted
256}
257
258protocol Repository {
259    associatedtype Item
260    func findById(id: String) -> Item?
261    func findAll() -> [Item]
262}
263
264func helper(x: Int) -> Int {
265    return x * 2
266}
267"#;
268        let plugin = CodeParserPlugin;
269        let entities = plugin.extract_entities(code, "UserService.swift");
270        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
271        eprintln!("Swift entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
272
273        assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
274        assert!(names.contains(&"Point"), "Should find struct Point, got: {:?}", names);
275        assert!(names.contains(&"Status"), "Should find enum Status, got: {:?}", names);
276        assert!(names.contains(&"Repository"), "Should find protocol Repository, got: {:?}", names);
277        assert!(names.contains(&"helper"), "Should find function helper, got: {:?}", names);
278    }
279
280    #[test]
281    fn test_elixir_entity_extraction() {
282        let code = r#"
283defmodule MyApp.Accounts do
284  def create_user(attrs) do
285    %User{}
286    |> User.changeset(attrs)
287    |> Repo.insert()
288  end
289
290  defp validate(attrs) do
291    # private helper
292    :ok
293  end
294
295  defmacro is_admin(user) do
296    quote do
297      unquote(user).role == :admin
298    end
299  end
300
301  defguard is_positive(x) when is_integer(x) and x > 0
302end
303
304defprotocol Printable do
305  def to_string(data)
306end
307
308defimpl Printable, for: Integer do
309  def to_string(i), do: Integer.to_string(i)
310end
311"#;
312        let plugin = CodeParserPlugin;
313        let entities = plugin.extract_entities(code, "accounts.ex");
314        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
315        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
316        eprintln!("Elixir entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
317
318        assert!(names.contains(&"MyApp.Accounts"), "Should find module, got: {:?}", names);
319        assert!(names.contains(&"create_user"), "Should find def, got: {:?}", names);
320        assert!(names.contains(&"validate"), "Should find defp, got: {:?}", names);
321        assert!(names.contains(&"is_admin"), "Should find defmacro, got: {:?}", names);
322        assert!(names.contains(&"Printable"), "Should find defprotocol, got: {:?}", names);
323
324        // Verify nesting: create_user should have MyApp.Accounts as parent
325        let create_user = entities.iter().find(|e| e.name == "create_user").unwrap();
326        assert!(create_user.parent_id.is_some(), "create_user should be nested under module");
327    }
328
329    #[test]
330    fn test_bash_entity_extraction() {
331        let code = r#"#!/bin/bash
332
333greet() {
334    echo "Hello, $1!"
335}
336
337function deploy {
338    echo "deploying..."
339}
340
341# not a function
342echo "main script"
343"#;
344        let plugin = CodeParserPlugin;
345        let entities = plugin.extract_entities(code, "deploy.sh");
346        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
347        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
348        eprintln!("Bash entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
349
350        assert!(names.contains(&"greet"), "Should find greet(), got: {:?}", names);
351        assert!(names.contains(&"deploy"), "Should find function deploy, got: {:?}", names);
352        assert_eq!(entities.len(), 2, "Should only find functions, got: {:?}", names);
353    }
354
355    #[test]
356    fn test_typescript_entity_extraction() {
357        // Existing language should still work
358        let code = r#"
359export function hello(): string {
360    return "hello";
361}
362
363export class Greeter {
364    greet(name: string): string {
365        return `Hello, ${name}!`;
366    }
367}
368"#;
369        let plugin = CodeParserPlugin;
370        let entities = plugin.extract_entities(code, "test.ts");
371        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
372        assert!(names.contains(&"hello"), "Should find hello function");
373        assert!(names.contains(&"Greeter"), "Should find Greeter class");
374    }
375
376    #[test]
377    fn test_module_typescript_entity_extraction() {
378        let code = r#"
379export function hello(): string {
380    return "hello";
381}
382"#;
383        let plugin = CodeParserPlugin;
384        let entities = plugin.extract_entities(code, "test.mts");
385        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
386
387        assert!(names.contains(&"hello"), "Should find hello function");
388    }
389
390    #[test]
391    fn test_commonjs_typescript_entity_extraction() {
392        let code = r#"
393export class Greeter {
394    greet(name: string): string {
395        return `Hello, ${name}!`;
396    }
397}
398"#;
399        let plugin = CodeParserPlugin;
400        let entities = plugin.extract_entities(code, "test.cts");
401        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
402
403        assert!(names.contains(&"Greeter"), "Should find Greeter class");
404        assert!(names.contains(&"greet"), "Should find greet method");
405    }
406
407    #[test]
408    fn test_typescript_generator_function_entity_extraction() {
409        let code = r#"
410export async function* streamUsers(): AsyncGenerator<string> {
411    yield "alice";
412}
413"#;
414        let plugin = CodeParserPlugin;
415        let entities = plugin.extract_entities(code, "stream.ts");
416        let stream = entities.iter().find(|e| e.name == "streamUsers");
417
418        assert!(stream.is_some(), "Should find generator function, got: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
419        assert_eq!(stream.unwrap().entity_type, "function");
420    }
421
422    #[test]
423    fn test_javascript_generator_function_entity_extraction() {
424        let code = r#"
425export function* ids() {
426    yield 1;
427    yield 2;
428}
429"#;
430        let plugin = CodeParserPlugin;
431        let entities = plugin.extract_entities(code, "ids.js");
432        let ids = entities.iter().find(|e| e.name == "ids");
433
434        assert!(ids.is_some(), "Should find generator function, got: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
435        assert_eq!(ids.unwrap().entity_type, "function");
436    }
437
438    #[test]
439    fn test_nested_functions_typescript() {
440        let code = r#"
441function outer() {
442    function inner() {
443        return 42;
444    }
445    return inner();
446}
447"#;
448        let plugin = CodeParserPlugin;
449        let entities = plugin.extract_entities(code, "nested.ts");
450        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
451        eprintln!("Nested TS: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
452
453        assert!(names.contains(&"outer"), "Should find outer, got: {:?}", names);
454        assert!(names.contains(&"inner"), "Should find inner, got: {:?}", names);
455
456        let inner = entities.iter().find(|e| e.name == "inner").unwrap();
457        assert!(inner.parent_id.is_some(), "inner should have parent_id");
458    }
459
460    #[test]
461    fn test_nested_functions_python() {
462        let code = "def outer():\n    def inner():\n        return 42\n    return inner()\n";
463        let plugin = CodeParserPlugin;
464        let entities = plugin.extract_entities(code, "nested.py");
465        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
466
467        assert!(names.contains(&"outer"), "got: {:?}", names);
468        assert!(names.contains(&"inner"), "got: {:?}", names);
469
470        let inner = entities.iter().find(|e| e.name == "inner").unwrap();
471        assert!(inner.parent_id.is_some(), "inner should have parent_id");
472    }
473
474    #[test]
475    fn test_nested_functions_rust() {
476        let code = "fn outer() {\n    fn inner() -> i32 {\n        42\n    }\n    inner();\n}\n";
477        let plugin = CodeParserPlugin;
478        let entities = plugin.extract_entities(code, "nested.rs");
479        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
480
481        assert!(names.contains(&"outer"), "got: {:?}", names);
482        assert!(names.contains(&"inner"), "got: {:?}", names);
483
484        let inner = entities.iter().find(|e| e.name == "inner").unwrap();
485        assert!(inner.parent_id.is_some(), "inner should have parent_id");
486    }
487
488    #[test]
489    fn test_rust_impl_blocks_unique_names() {
490        let code = r#"
491trait Greeting {
492    fn greet(&self) -> String;
493}
494
495struct Person;
496struct Robot;
497struct Cat;
498
499impl Greeting for Person {
500    fn greet(&self) -> String { "Hello".to_string() }
501}
502
503impl Greeting for Robot {
504    fn greet(&self) -> String { "Beep".to_string() }
505}
506
507impl Greeting for Cat {
508    fn greet(&self) -> String { "Meow".to_string() }
509}
510"#;
511        let plugin = CodeParserPlugin;
512        let entities = plugin.extract_entities(code, "impls.rs");
513        let impl_entities: Vec<&_> = entities.iter()
514            .filter(|e| e.entity_type == "impl")
515            .collect();
516        let names: Vec<&str> = impl_entities.iter().map(|e| e.name.as_str()).collect();
517
518        assert_eq!(impl_entities.len(), 3, "Should find 3 impl blocks, got: {:?}", names);
519        assert!(names.contains(&"Greeting for Person"), "got: {:?}", names);
520        assert!(names.contains(&"Greeting for Robot"), "got: {:?}", names);
521        assert!(names.contains(&"Greeting for Cat"), "got: {:?}", names);
522    }
523
524    #[test]
525    fn test_nested_functions_go() {
526        // Go doesn't have named nested functions, but has nested type/var declarations
527        let code = "package main\n\nfunc outer() {\n    var x int = 42\n    _ = x\n}\n";
528        let plugin = CodeParserPlugin;
529        let entities = plugin.extract_entities(code, "nested.go");
530        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
531
532        assert!(names.contains(&"outer"), "got: {:?}", names);
533    }
534
535    #[test]
536    fn test_renamed_function_same_structural_hash() {
537        let code_a = "def get_card():\n    return db.query('cards')\n";
538        let code_b = "def get_card_1():\n    return db.query('cards')\n";
539
540        let plugin = CodeParserPlugin;
541        let entities_a = plugin.extract_entities(code_a, "a.py");
542        let entities_b = plugin.extract_entities(code_b, "b.py");
543
544        assert_eq!(entities_a.len(), 1, "Should find one entity in a");
545        assert_eq!(entities_b.len(), 1, "Should find one entity in b");
546        assert_eq!(entities_a[0].name, "get_card");
547        assert_eq!(entities_b[0].name, "get_card_1");
548
549        // Structural hash should match since only the name differs
550        assert_eq!(
551            entities_a[0].structural_hash, entities_b[0].structural_hash,
552            "Renamed function with identical body should have same structural_hash"
553        );
554
555        // Content hash should differ (it includes the name)
556        assert_ne!(
557            entities_a[0].content_hash, entities_b[0].content_hash,
558            "Content hash should differ since raw content includes the name"
559        );
560    }
561
562    #[test]
563    fn test_hcl_entity_extraction() {
564        let code = r#"
565region = "eu-west-1"
566
567variable "image_id" {
568  type = string
569}
570
571resource "aws_instance" "web" {
572  ami = var.image_id
573
574  lifecycle {
575    create_before_destroy = true
576  }
577}
578"#;
579        let plugin = CodeParserPlugin;
580        let entities = plugin.extract_entities(code, "main.tf");
581        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
582        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
583        eprintln!("HCL entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
584
585        assert!(names.contains(&"region"), "Should find top-level attribute, got: {:?}", names);
586        assert!(names.contains(&"variable.image_id"), "Should find variable block, got: {:?}", names);
587        assert!(names.contains(&"resource.aws_instance.web"), "Should find resource block, got: {:?}", names);
588        assert!(
589            names.contains(&"resource.aws_instance.web.lifecycle"),
590            "Should find nested lifecycle block with qualified name, got: {:?}",
591            names
592        );
593        assert!(!names.contains(&"ami"), "Should skip nested attributes inside blocks, got: {:?}", names);
594        assert!(
595            !names.contains(&"create_before_destroy"),
596            "Should skip nested attributes inside nested blocks, got: {:?}",
597            names
598        );
599
600        let lifecycle = entities
601            .iter()
602            .find(|e| e.name == "resource.aws_instance.web.lifecycle")
603            .unwrap();
604        assert!(lifecycle.parent_id.is_some(), "lifecycle should be nested under resource");
605        assert!(types.contains(&"attribute"), "Should preserve attribute entity type for top-level attributes");
606    }
607
608    #[test]
609    fn test_kotlin_entity_extraction() {
610        let code = r#"
611class UserService {
612    val name: String = ""
613
614    fun greet(): String {
615        return "Hello, $name"
616    }
617
618    companion object {
619        fun create(): UserService = UserService()
620    }
621}
622
623interface Repository {
624    fun findById(id: Int): Any?
625}
626
627object AppConfig {
628    val version = "1.0"
629}
630
631fun topLevel(x: Int): Int = x * 2
632"#;
633        let plugin = CodeParserPlugin;
634        let entities = plugin.extract_entities(code, "App.kt");
635        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
636        eprintln!("Kotlin entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
637        assert!(names.contains(&"UserService"), "got: {:?}", names);
638        assert!(names.contains(&"greet"), "got: {:?}", names);
639        assert!(names.contains(&"Repository"), "got: {:?}", names);
640        assert!(names.contains(&"findById"), "got: {:?}", names);
641        assert!(names.contains(&"AppConfig"), "got: {:?}", names);
642        assert!(names.contains(&"topLevel"), "got: {:?}", names);
643    }
644
645    #[test]
646    fn test_xml_entity_extraction() {
647        let code = r#"<?xml version="1.0" encoding="UTF-8"?>
648<project>
649    <groupId>com.example</groupId>
650    <artifactId>my-app</artifactId>
651    <dependencies>
652        <dependency>
653            <groupId>junit</groupId>
654            <artifactId>junit</artifactId>
655        </dependency>
656    </dependencies>
657    <build>
658        <plugins>
659            <plugin>
660                <groupId>org.apache.maven</groupId>
661            </plugin>
662        </plugins>
663    </build>
664</project>
665"#;
666        let plugin = CodeParserPlugin;
667        let entities = plugin.extract_entities(code, "pom.xml");
668        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
669        eprintln!("XML entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
670        assert!(names.contains(&"project"), "got: {:?}", names);
671        assert!(names.contains(&"dependencies"), "got: {:?}", names);
672        assert!(names.contains(&"build"), "got: {:?}", names);
673    }
674
675    #[test]
676    fn test_arrow_callback_scope_boundary_typescript() {
677        // Arrow function callbacks: locals are suppressed, but inner
678        // class/function declarations are still extracted. Nested callbacks
679        // also suppress their locals.
680        let code = r#"
681const activeQueues = [
682  { queue: queues.fooQueue, processor: foo.process },
683];
684
685activeQueues.forEach((handler: any) => {
686  const queue = handler.queue;
687  let retries = 0;
688
689  class QueueHandler {
690    handle() { return queue; }
691  }
692
693  function createHandler() {
694    return new QueueHandler();
695  }
696
697  queue.process((job) => {
698    const orderId = job.data.orderId;
699    return orderId;
700  });
701});
702
703function handleFailure(job: any, err: any) {
704  console.error('failed', err);
705}
706"#;
707        let plugin = CodeParserPlugin;
708        let entities = plugin.extract_entities(code, "process.ts");
709        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
710        let top_level: Vec<&str> = entities
711            .iter()
712            .filter(|e| e.parent_id.is_none())
713            .map(|e| e.name.as_str())
714            .collect();
715
716        // Top-level entities preserved
717        assert!(top_level.contains(&"activeQueues"), "got: {:?}", top_level);
718        assert!(top_level.contains(&"handleFailure"), "got: {:?}", top_level);
719
720        // Declarations inside callback extracted
721        assert!(names.contains(&"QueueHandler"), "got: {:?}", names);
722        assert!(names.contains(&"handle"), "got: {:?}", names);
723        assert!(names.contains(&"createHandler"), "got: {:?}", names);
724
725        // Locals inside callbacks suppressed
726        assert!(!names.contains(&"queue"), "got: {:?}", names);
727        assert!(!names.contains(&"retries"), "got: {:?}", names);
728        assert!(!names.contains(&"orderId"), "got: {:?}", names);
729    }
730
731    #[test]
732    fn test_top_level_iife_wrapper_still_extracts_typescript_entities() {
733        let code = r#"
734function factory() {
735  class Foo {
736    method(): number {
737      return 1;
738    }
739  }
740
741  function bar(): Foo {
742    return new Foo();
743  }
744}
745
746factory();
747"#;
748        let plugin = CodeParserPlugin;
749        let entities = plugin.extract_entities(code, "wrapped.ts");
750        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
751        assert!(
752            names.contains(&"factory"),
753            "Should find top-level wrapper function, got: {:?}",
754            names
755        );
756        assert!(
757            names.contains(&"Foo"),
758            "Should find class inside top-level wrapper, got: {:?}",
759            names
760        );
761        assert!(
762            names.contains(&"bar"),
763            "Should find function inside top-level wrapper, got: {:?}",
764            names
765        );
766    }
767
768    #[test]
769    fn test_top_level_iife_still_extracts_typescript_entities() {
770        let code = r#"
771(() => {
772  class Foo {
773    method(): number {
774      return 1;
775    }
776  }
777
778  function bar(): Foo {
779    return new Foo();
780  }
781})();
782"#;
783        let plugin = CodeParserPlugin;
784        let entities = plugin.extract_entities(code, "iife.ts");
785        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
786        assert!(
787            names.contains(&"Foo"),
788            "Should find class inside top-level IIFE, got: {:?}",
789            names
790        );
791        assert!(
792            names.contains(&"bar"),
793            "Should find function inside top-level IIFE, got: {:?}",
794            names
795        );
796    }
797
798    #[test]
799    fn test_function_locals_not_extracted_as_nested_entities_typescript() {
800        let code = r#"
801export default function foo() {
802  const x = 1;
803  return x;
804}
805"#;
806        let plugin = CodeParserPlugin;
807        let entities = plugin.extract_entities(code, "default-export.ts");
808        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
809        assert!(
810            names.contains(&"foo"),
811            "Should find exported function, got: {:?}",
812            names
813        );
814        assert!(
815            !names.contains(&"x"),
816            "Local inside function should not be extracted as an entity, got: {:?}",
817            names
818        );
819    }
820
821    #[test]
822    fn test_function_expression_scope_boundary_typescript() {
823        // Function expressions: assigned to variables, or used as callback
824        // arguments. Locals are suppressed in all cases.
825        let code = r#"
826const foo = function namedExpr(x: number) {
827  const inner = x + 1;
828  return inner;
829};
830
831const bar = function(y: number) {
832  const local = y * 2;
833  return local;
834};
835
836const items = [1, 2, 3];
837
838items.forEach(function process(item) {
839  const doubled = item * 2;
840  console.log(doubled);
841});
842"#;
843        let plugin = CodeParserPlugin;
844        let entities = plugin.extract_entities(code, "funexpr.ts");
845        let top_level: Vec<&str> = entities
846            .iter()
847            .filter(|e| e.parent_id.is_none())
848            .map(|e| e.name.as_str())
849            .collect();
850        let find = |name: &str| entities.iter().find(|e| e.name == name).unwrap();
851        let all_names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
852
853        // Top-level declarations preserved, and const-assigned function
854        // expressions are promoted from variable to function.
855        assert!(top_level.contains(&"foo"), "got: {:?}", top_level);
856        assert!(top_level.contains(&"bar"), "got: {:?}", top_level);
857        assert!(top_level.contains(&"items"), "got: {:?}", top_level);
858        assert_eq!(find("foo").entity_type, "function");
859        assert_eq!(find("bar").entity_type, "function");
860        assert_eq!(find("items").entity_type, "variable");
861
862        // Locals inside function expressions suppressed
863        assert!(!all_names.contains(&"inner"), "got: {:?}", all_names);
864        assert!(!all_names.contains(&"local"), "got: {:?}", all_names);
865        assert!(!all_names.contains(&"doubled"), "got: {:?}", all_names);
866
867        // Named function expression used as callback argument not extracted
868        assert!(!top_level.contains(&"process"), "got: {:?}", top_level);
869    }
870
871    #[test]
872    fn test_variable_assigned_arrow_extracts_inner_entities() {
873        // Arrow function assigned to a variable: inner class/function
874        // declarations should be extracted, locals should be suppressed.
875        let code = r#"
876const handler = () => {
877  class Inner {
878    run() { return 1; }
879  }
880
881  function make() {
882    return new Inner();
883  }
884
885  const local = 42;
886};
887"#;
888        let plugin = CodeParserPlugin;
889        let entities = plugin.extract_entities(code, "assigned.ts");
890        let handler = entities.iter().find(|e| e.name == "handler").unwrap();
891        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
892
893        assert_eq!(handler.entity_type, "function");
894        assert!(names.contains(&"handler"), "got: {:?}", names);
895        assert!(names.contains(&"Inner"), "got: {:?}", names);
896        assert!(names.contains(&"run"), "got: {:?}", names);
897        assert!(names.contains(&"make"), "got: {:?}", names);
898        assert!(!names.contains(&"local"), "got: {:?}", names);
899    }
900
901    #[test]
902    fn test_variable_assigned_function_expression_extracts_inner_entities() {
903        // Function expression assigned to a variable: same behavior.
904        let code = r#"
905const handler = function() {
906  class Inner {}
907  function make() { return new Inner(); }
908  const local = 42;
909};
910"#;
911        let plugin = CodeParserPlugin;
912        let entities = plugin.extract_entities(code, "funexpr-inner.ts");
913        let handler = entities.iter().find(|e| e.name == "handler").unwrap();
914        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
915
916        assert_eq!(handler.entity_type, "function");
917        assert!(names.contains(&"handler"), "got: {:?}", names);
918        assert!(names.contains(&"Inner"), "got: {:?}", names);
919        assert!(names.contains(&"make"), "got: {:?}", names);
920        assert!(!names.contains(&"local"), "got: {:?}", names);
921    }
922
923    #[test]
924    fn test_let_assigned_arrow_stays_variable_typescript() {
925        let code = r#"
926let handler = () => {
927  return 42;
928};
929"#;
930        let plugin = CodeParserPlugin;
931        let entities = plugin.extract_entities(code, "let-assigned.ts");
932        let handler = entities.iter().find(|e| e.name == "handler").unwrap();
933
934        assert_eq!(handler.entity_type, "variable");
935    }
936
937    #[test]
938    fn test_const_assigned_arrow_promoted_to_function_javascript() {
939        let code = r#"
940const handler = () => {
941  return 42;
942};
943"#;
944        let plugin = CodeParserPlugin;
945        let entities = plugin.extract_entities(code, "handler.js");
946        let handler = entities.iter().find(|e| e.name == "handler").unwrap();
947
948        assert_eq!(handler.entity_type, "function");
949    }
950
951    #[test]
952    fn test_go_var_declaration() {
953        let code = r#"package featuremgmt
954
955type FeatureFlag struct {
956	Name        string
957	Description string
958	Stage       string
959}
960
961var standardFeatureFlags = []FeatureFlag{
962	{
963		Name:        "panelTitleSearch",
964		Description: "Search for dashboards using panel title",
965		Stage:       "PublicPreview",
966	},
967}
968
969func GetFlags() []FeatureFlag {
970	return standardFeatureFlags
971}
972"#;
973        let plugin = CodeParserPlugin;
974        let entities = plugin.extract_entities(code, "flags.go");
975        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
976        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
977        eprintln!("Go entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
978
979        assert!(names.contains(&"FeatureFlag"), "Should find type FeatureFlag, got: {:?}", names);
980        assert!(names.contains(&"standardFeatureFlags"), "Should find var standardFeatureFlags, got: {:?}", names);
981        assert!(names.contains(&"GetFlags"), "Should find func GetFlags, got: {:?}", names);
982    }
983
984    #[test]
985    fn test_go_grouped_var_declaration() {
986        let code = r#"package test
987
988var (
989	simple = 42
990	flags = []string{"a", "b"}
991)
992
993const (
994	x = 1
995	y = 2
996)
997
998func main() {}
999"#;
1000        let plugin = CodeParserPlugin;
1001        let entities = plugin.extract_entities(code, "test.go");
1002        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1003        let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1004        eprintln!("Go grouped entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
1005
1006        assert!(names.contains(&"flags") || names.contains(&"simple"), "Should find grouped var, got: {:?}", names);
1007        assert!(names.contains(&"x"), "Should find grouped const x, got: {:?}", names);
1008        assert!(names.contains(&"main"), "Should find func main, got: {:?}", names);
1009    }
1010
1011    #[test]
1012    fn test_dart_entity_extraction() {
1013        let code = r#"
1014import 'dart:math';
1015
1016class Calculator {
1017  final String name;
1018
1019  Calculator(this.name);
1020
1021  Calculator.withDefault() : name = 'default';
1022
1023  factory Calculator.create(String name) {
1024    return Calculator(name);
1025  }
1026
1027  int add(int a, int b) {
1028    return a + b;
1029  }
1030
1031  int get doubleAdd => add(1, 1) * 2;
1032
1033  set label(String value) {
1034    // no-op
1035  }
1036
1037  int operator +(Calculator other) {
1038    return 0;
1039  }
1040}
1041
1042mixin Loggable {
1043  void log(String message) {
1044    print(message);
1045  }
1046}
1047
1048extension StringExt on String {
1049  bool get isBlank => trim().isEmpty;
1050}
1051
1052enum Status {
1053  active,
1054  inactive;
1055
1056  String display() => name.toUpperCase();
1057}
1058
1059typedef Callback = void Function(int);
1060
1061int add(int a, int b) {
1062  return a + b;
1063}
1064
1065extension type Wrapper(int value) implements int {}
1066"#;
1067        let plugin = CodeParserPlugin;
1068        let entities = plugin.extract_entities(code, "calculator.dart");
1069        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1070        eprintln!(
1071            "Dart entities: {:?}",
1072            entities
1073                .iter()
1074                .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1075                .collect::<Vec<_>>()
1076        );
1077
1078        // Top-level declarations
1079        assert!(names.contains(&"Calculator"), "Should find class, got: {:?}", names);
1080        assert!(names.contains(&"Loggable"), "Should find mixin, got: {:?}", names);
1081        assert!(names.contains(&"StringExt"), "Should find extension, got: {:?}", names);
1082        assert!(names.contains(&"Status"), "Should find enum, got: {:?}", names);
1083        assert!(names.contains(&"Callback"), "Should find typedef, got: {:?}", names);
1084        assert!(names.contains(&"add"), "Should find top-level function, got: {:?}", names);
1085        assert!(names.contains(&"Wrapper"), "Should find extension type, got: {:?}", names);
1086
1087        // Class members with correct types
1088        let add_method = entities.iter().find(|e| e.name == "add" && e.parent_id.is_some());
1089        assert!(add_method.is_some(), "Should find add method inside Calculator");
1090        assert_eq!(add_method.unwrap().entity_type, "method");
1091
1092        // Named constructor gets distinct name from unnamed constructor
1093        let unnamed_ctor = entities.iter().find(|e| e.name == "Calculator" && e.entity_type == "constructor");
1094        assert!(unnamed_ctor.is_some(), "Should find unnamed constructor");
1095        let named_ctor = entities.iter().find(|e| e.name == "Calculator.withDefault");
1096        assert!(named_ctor.is_some(), "Should find named constructor Calculator.withDefault, got: {:?}", names);
1097        assert_eq!(named_ctor.unwrap().entity_type, "constructor");
1098        assert_ne!(unnamed_ctor.unwrap().id, named_ctor.unwrap().id, "Named and unnamed constructors must have different entity IDs");
1099
1100        // Factory constructor
1101        let factory_ctor = entities.iter().find(|e| e.name == "Calculator.create");
1102        assert!(factory_ctor.is_some(), "Should find factory constructor Calculator.create, got: {:?}", names);
1103        assert_eq!(factory_ctor.unwrap().entity_type, "constructor");
1104
1105        // Getter, setter, operator
1106        let getter = entities.iter().find(|e| e.name == "doubleAdd");
1107        assert!(getter.is_some(), "Should find getter doubleAdd");
1108        assert_eq!(getter.unwrap().entity_type, "getter");
1109
1110        let setter = entities.iter().find(|e| e.name == "label");
1111        assert!(setter.is_some(), "Should find setter label");
1112        assert_eq!(setter.unwrap().entity_type, "setter");
1113
1114        let operator = entities.iter().find(|e| e.name == "operator +");
1115        assert!(operator.is_some(), "Should find operator +");
1116        assert_eq!(operator.unwrap().entity_type, "method");
1117
1118        // Mixin members have parent
1119        let log_method = entities.iter().find(|e| e.name == "log");
1120        assert!(log_method.is_some(), "Should find log in Loggable");
1121        assert!(log_method.unwrap().parent_id.is_some(), "log should have parent_id");
1122
1123        // Entity type mapping
1124        let callback = entities.iter().find(|e| e.name == "Callback").unwrap();
1125        assert_eq!(callback.entity_type, "type", "typedef should map to 'type'");
1126
1127        let loggable = entities.iter().find(|e| e.name == "Loggable").unwrap();
1128        assert_eq!(loggable.entity_type, "mixin");
1129
1130        let ext = entities.iter().find(|e| e.name == "StringExt").unwrap();
1131        assert_eq!(ext.entity_type, "extension");
1132
1133        let wrapper = entities.iter().find(|e| e.name == "Wrapper").unwrap();
1134        assert_eq!(wrapper.entity_type, "extension");
1135    }
1136
1137    #[test]
1138    fn test_dart_top_level_function_includes_body() {
1139        let code = r#"
1140int add(int a, int b) {
1141  return a + b;
1142}
1143
1144String greet(String name) => 'Hello, $name!';
1145"#;
1146        let plugin = CodeParserPlugin;
1147        let entities = plugin.extract_entities(code, "funcs.dart");
1148        eprintln!(
1149            "Dart top-level: {:?}",
1150            entities
1151                .iter()
1152                .map(|e| (&e.name, &e.entity_type, &e.content))
1153                .collect::<Vec<_>>()
1154        );
1155
1156        let add_fn = entities.iter().find(|e| e.name == "add").unwrap();
1157        assert!(
1158            add_fn.content.contains("return a + b"),
1159            "Top-level function content should include the body, got: {:?}",
1160            add_fn.content
1161        );
1162
1163        let greet_fn = entities.iter().find(|e| e.name == "greet").unwrap();
1164        assert!(
1165            greet_fn.content.contains("Hello"),
1166            "Expression body should be included, got: {:?}",
1167            greet_fn.content
1168        );
1169
1170        // Body changes should produce different content_hash
1171        let code_v2 = r#"
1172int add(int a, int b) {
1173  return a * b;
1174}
1175
1176String greet(String name) => 'Hello, $name!';
1177"#;
1178        let entities_v2 = plugin.extract_entities(code_v2, "funcs.dart");
1179        let add_v2 = entities_v2.iter().find(|e| e.name == "add").unwrap();
1180        assert_ne!(
1181            add_fn.content_hash, add_v2.content_hash,
1182            "Body change should produce different content_hash"
1183        );
1184
1185        // Unchanged function should keep the same hash
1186        let greet_v2 = entities_v2.iter().find(|e| e.name == "greet").unwrap();
1187        assert_eq!(
1188            greet_fn.content_hash, greet_v2.content_hash,
1189            "Unchanged function should keep the same content_hash"
1190        );
1191    }
1192
1193    #[test]
1194    fn test_dart_renamed_named_constructor_same_structural_hash() {
1195        let code_a = r#"
1196class Foo {
1197  Foo.fromJson(Map<String, dynamic> json) {
1198    print(json);
1199  }
1200}
1201"#;
1202        let code_b = r#"
1203class Foo {
1204  Foo.fromMap(Map<String, dynamic> json) {
1205    print(json);
1206  }
1207}
1208"#;
1209        let plugin = CodeParserPlugin;
1210        let entities_a = plugin.extract_entities(code_a, "a.dart");
1211        let entities_b = plugin.extract_entities(code_b, "b.dart");
1212
1213        let ctor_a = entities_a.iter().find(|e| e.name == "Foo.fromJson").unwrap();
1214        let ctor_b = entities_b.iter().find(|e| e.name == "Foo.fromMap").unwrap();
1215
1216        assert_eq!(
1217            ctor_a.structural_hash, ctor_b.structural_hash,
1218            "Renamed named constructor with identical body should have same structural_hash"
1219        );
1220        assert_ne!(
1221            ctor_a.content_hash, ctor_b.content_hash,
1222            "Content hash should differ since raw content includes the name"
1223        );
1224    }
1225
1226    #[test]
1227    fn test_dart_top_level_getter_setter() {
1228        let code = r#"
1229int _value = 0;
1230
1231int get currentValue {
1232  return _value;
1233}
1234
1235set currentValue(int v) {
1236  _value = v;
1237}
1238"#;
1239        let plugin = CodeParserPlugin;
1240        let entities = plugin.extract_entities(code, "accessors.dart");
1241        eprintln!(
1242            "Dart top-level accessors: {:?}",
1243            entities
1244                .iter()
1245                .map(|e| (&e.name, &e.entity_type, &e.content))
1246                .collect::<Vec<_>>()
1247        );
1248
1249        let getter = entities.iter().find(|e| e.name == "currentValue" && e.entity_type == "getter");
1250        assert!(getter.is_some(), "Should find top-level getter, got: {:?}",
1251            entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1252        assert!(
1253            getter.unwrap().content.contains("return _value"),
1254            "Top-level getter content should include the body"
1255        );
1256        assert!(getter.unwrap().parent_id.is_none(), "Top-level getter should have no parent");
1257
1258        // tree-sitter-dart 0.1.0 parses top-level setters as function_signature
1259        // (treating `set` as a type_identifier). setter_signature is only
1260        // produced inside class_member → method_signature.
1261        let setter = entities.iter().find(|e| e.name == "currentValue" && e.entity_type == "function");
1262        assert!(setter.is_some(), "Should find top-level setter as function, got: {:?}",
1263            entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1264        assert!(
1265            setter.unwrap().content.contains("_value = v"),
1266            "Top-level setter content should include the body"
1267        );
1268    }
1269
1270    #[test]
1271    fn test_dart_field_entity_type() {
1272        let code = r#"
1273class Config {
1274  final String name;
1275  static const int maxRetries = 3;
1276}
1277"#;
1278        let plugin = CodeParserPlugin;
1279        let entities = plugin.extract_entities(code, "config.dart");
1280        eprintln!(
1281            "Dart fields: {:?}",
1282            entities
1283                .iter()
1284                .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1285                .collect::<Vec<_>>()
1286        );
1287
1288        let name_field = entities.iter().find(|e| e.name == "name" && e.parent_id.is_some());
1289        assert!(name_field.is_some(), "Should find field 'name', got: {:?}",
1290            entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1291        assert_eq!(name_field.unwrap().entity_type, "field");
1292
1293        let max_retries = entities.iter().find(|e| e.name == "maxRetries");
1294        assert!(max_retries.is_some(), "Should find field 'maxRetries', got: {:?}",
1295            entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1296        assert_eq!(max_retries.unwrap().entity_type, "field");
1297    }
1298
1299    #[test]
1300    fn test_dart_identifier_list_fields() {
1301        // identifier_list produces bare identifier children (no "name" field),
1302        // unlike initialized_identifier_list which wraps each in an
1303        // initialized_identifier node with a "name" field.
1304        let code = r#"
1305abstract class Shape {
1306  abstract double x, y;
1307  abstract String label;
1308}
1309"#;
1310        let plugin = CodeParserPlugin;
1311        let entities = plugin.extract_entities(code, "shape.dart");
1312        eprintln!(
1313            "Dart identifier_list fields: {:?}",
1314            entities
1315                .iter()
1316                .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1317                .collect::<Vec<_>>()
1318        );
1319
1320        let x_field = entities.iter().find(|e| e.name == "x");
1321        assert!(x_field.is_some(), "Should find field 'x' from identifier_list, got: {:?}",
1322            entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1323        assert_eq!(x_field.unwrap().entity_type, "field");
1324        assert!(x_field.unwrap().parent_id.is_some(), "field 'x' should be nested under Shape");
1325
1326        let label_field = entities.iter().find(|e| e.name == "label");
1327        assert!(label_field.is_some(), "Should find field 'label' from single-element identifier_list, got: {:?}",
1328            entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1329        assert_eq!(label_field.unwrap().entity_type, "field");
1330    }
1331
1332    #[test]
1333    fn test_ocaml_entity_extraction() {
1334        let code = r#"
1335type color = Red | Green | Blue
1336
1337type point = {
1338  x : float;
1339  y : float;
1340}
1341
1342exception Not_found of string
1343
1344let greet name =
1345  Printf.printf "Hello, %s!\n" name
1346
1347let add a b = a + b
1348
1349let version = "1.0"
1350
1351let color_to_string = function
1352  | Red -> "red"
1353  | Blue -> "blue"
1354
1355let inc = fun x -> x + 1
1356
1357module MyModule = struct
1358  let helper x = x * 2
1359end
1360
1361module type Printable = sig
1362  val to_string : 'a -> string
1363end
1364
1365external caml_input : in_channel -> bytes -> int -> int -> int = "caml_input"
1366
1367class point_class x_init = object
1368  val mutable x = x_init
1369  method get_x = x
1370end
1371
1372class type measurable = object
1373  method measure : float
1374end
1375"#;
1376        let plugin = CodeParserPlugin;
1377        let entities = plugin.extract_entities(code, "example.ml");
1378        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1379        eprintln!("OCaml entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1380
1381        let find = |name: &str| entities.iter().find(|e| e.name == name)
1382            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1383
1384        assert_eq!(find("color").entity_type, "type");
1385        assert_eq!(find("point").entity_type, "type");
1386        assert_eq!(find("Not_found").entity_type, "exception");
1387        assert_eq!(find("greet").entity_type, "function");
1388        assert_eq!(find("add").entity_type, "function");
1389        assert_eq!(find("version").entity_type, "value");
1390        assert_eq!(find("color_to_string").entity_type, "function");
1391        assert_eq!(find("inc").entity_type, "function");
1392        assert_eq!(find("MyModule").entity_type, "module");
1393        assert_eq!(find("Printable").entity_type, "module_type");
1394        assert_eq!(find("caml_input").entity_type, "external");
1395        assert_eq!(find("point_class").entity_type, "class");
1396        assert_eq!(find("measurable").entity_type, "class_type");
1397    }
1398
1399    #[test]
1400    fn test_ocaml_nested_module_entities() {
1401        let code = r#"
1402module Outer = struct
1403  let x = 42
1404
1405  module Inner = struct
1406    let y = 0
1407  end
1408end
1409"#;
1410        let plugin = CodeParserPlugin;
1411        let entities = plugin.extract_entities(code, "nested.ml");
1412        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1413        eprintln!("OCaml nested: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
1414
1415        let find = |name: &str| entities.iter().find(|e| e.name == name)
1416            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1417
1418        let outer = find("Outer");
1419        let x = find("x");
1420        let inner = find("Inner");
1421        let y = find("y");
1422
1423        assert_eq!(outer.entity_type, "module");
1424        assert_eq!(x.entity_type, "value");
1425        assert_eq!(inner.entity_type, "module");
1426        assert_eq!(y.entity_type, "value");
1427
1428        assert!(x.parent_id.as_ref().is_some_and(|p| p == &outer.id), "x should be nested under Outer");
1429        assert!(inner.parent_id.as_ref().is_some_and(|p| p == &outer.id), "Inner should be nested under Outer");
1430        assert!(y.parent_id.as_ref().is_some_and(|p| p == &inner.id), "y should be nested under Inner");
1431    }
1432
1433    #[test]
1434    fn test_ocaml_interface_entity_extraction() {
1435        let code = r#"
1436type t
1437
1438val create : string -> t
1439val to_string : t -> string
1440
1441exception Invalid_input of string
1442
1443module type Serializable = sig
1444  val serialize : t -> string
1445end
1446"#;
1447        let plugin = CodeParserPlugin;
1448        let entities = plugin.extract_entities(code, "example.mli");
1449        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1450        eprintln!("OCaml interface entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1451
1452        let find = |name: &str| entities.iter().find(|e| e.name == name)
1453            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1454
1455        assert_eq!(find("t").entity_type, "type");
1456        assert_eq!(find("create").entity_type, "val");
1457        assert_eq!(find("to_string").entity_type, "val");
1458        assert_eq!(find("Invalid_input").entity_type, "exception");
1459        assert_eq!(find("Serializable").entity_type, "module_type");
1460    }
1461
1462    #[test]
1463    fn test_ocaml_mutual_recursion_let() {
1464        let code = r#"
1465let rec even n = (n = 0) || odd (n - 1)
1466and odd n = (n <> 0) && even (n - 1)
1467
1468let rec ping x = pong (x - 1)
1469and pong x = if x <= 0 then 0 else ping (x - 1)
1470"#;
1471        let plugin = CodeParserPlugin;
1472        let entities = plugin.extract_entities(code, "mutual.ml");
1473        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1474        eprintln!("OCaml mutual let: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1475
1476        let find = |name: &str| entities.iter().find(|e| e.name == name)
1477            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1478
1479        assert_eq!(find("even").entity_type, "function");
1480        assert_eq!(find("odd").entity_type, "function");
1481        assert_eq!(find("ping").entity_type, "function");
1482        assert_eq!(find("pong").entity_type, "function");
1483    }
1484
1485    #[test]
1486    fn test_ocaml_mutual_recursion_module() {
1487        let code = r#"
1488module rec A : sig val x : int end = struct
1489  let x = B.y + 1
1490end
1491and B : sig val y : int end = struct
1492  let y = 0
1493end
1494"#;
1495        let plugin = CodeParserPlugin;
1496        let entities = plugin.extract_entities(code, "mutual_mod.ml");
1497        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1498        eprintln!("OCaml mutual module: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
1499
1500        let find = |name: &str| entities.iter().find(|e| e.name == name)
1501            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1502
1503        let a = find("A");
1504        let b = find("B");
1505        assert_eq!(a.entity_type, "module");
1506        assert_eq!(b.entity_type, "module");
1507
1508        let x = find("x");
1509        let y = find("y");
1510        assert!(x.parent_id.as_ref().is_some_and(|p| p == &a.id), "x should be nested under A");
1511        assert!(y.parent_id.as_ref().is_some_and(|p| p == &b.id), "y should be nested under B");
1512    }
1513
1514    #[test]
1515    fn test_ocaml_destructured_let() {
1516        let code = r#"
1517let (a, b) = (1, 2)
1518
1519let { x; y } = point
1520
1521let simple = 42
1522"#;
1523        let plugin = CodeParserPlugin;
1524        let entities = plugin.extract_entities(code, "destruct.ml");
1525        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1526        eprintln!("OCaml destructured: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1527
1528        let find = |name: &str| entities.iter().find(|e| e.name == name)
1529            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1530
1531        assert_eq!(find("a").entity_type, "value");
1532        assert_eq!(find("b").entity_type, "value");
1533        assert_eq!(find("x").entity_type, "value");
1534        assert_eq!(find("y").entity_type, "value");
1535        assert_eq!(find("simple").entity_type, "value");
1536    }
1537
1538    #[test]
1539    fn test_ocaml_mutual_recursion_class() {
1540        let code = r#"
1541class foo = object
1542  method x = 1
1543end
1544and bar = object
1545  method y = 2
1546end
1547"#;
1548        let plugin = CodeParserPlugin;
1549        let entities = plugin.extract_entities(code, "classes.ml");
1550        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1551        eprintln!("OCaml mutual class: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1552
1553        let find = |name: &str| entities.iter().find(|e| e.name == name)
1554            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1555
1556        assert_eq!(find("foo").entity_type, "class");
1557        assert_eq!(find("bar").entity_type, "class");
1558    }
1559
1560    #[test]
1561    fn test_perl_entity_extraction() {
1562        let code = r#"package Foo::Bar;
1563
1564use strict;
1565use warnings;
1566
1567sub hello {
1568    my ($self, $name) = @_;
1569    print "Hello, $name!\n";
1570}
1571
1572sub _private_helper {
1573    return 42;
1574}
1575
15761;
1577"#;
1578        let plugin = CodeParserPlugin;
1579        let entities = plugin.extract_entities(code, "Foo/Bar.pm");
1580        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1581
1582        assert!(names.contains(&"Foo::Bar"), "got: {:?}", names);
1583        assert!(names.contains(&"hello"), "got: {:?}", names);
1584        assert!(names.contains(&"_private_helper"), "got: {:?}", names);
1585
1586        let find = |name: &str| entities.iter().find(|e| e.name == name)
1587            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1588
1589        assert_eq!(find("Foo::Bar").entity_type, "package");
1590        assert_eq!(find("hello").entity_type, "function");
1591        assert_eq!(find("_private_helper").entity_type, "function");
1592    }
1593
1594    #[test]
1595    fn test_fortran_entity_extraction() {
1596        let code = r#"module math_utils
1597  implicit none
1598contains
1599  function add(a, b) result(c)
1600    integer, intent(in) :: a, b
1601    integer :: c
1602    c = a + b
1603  end function add
1604
1605  subroutine greet()
1606    print *, "hello"
1607  end subroutine greet
1608end module math_utils
1609
1610program main
1611  implicit none
1612  print *, "hello"
1613end program main
1614"#;
1615        let plugin = CodeParserPlugin;
1616        let entities = plugin.extract_entities(code, "test.f90");
1617        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1618
1619        assert!(names.contains(&"math_utils"), "got: {:?}", names);
1620        assert!(names.contains(&"add"), "got: {:?}", names);
1621        assert!(names.contains(&"greet"), "got: {:?}", names);
1622        assert!(names.contains(&"main"), "got: {:?}", names);
1623
1624        let find = |name: &str| entities.iter().find(|e| e.name == name)
1625            .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1626
1627        assert_eq!(find("math_utils").entity_type, "module");
1628        assert_eq!(find("add").entity_type, "function");
1629        assert_eq!(find("greet").entity_type, "subroutine");
1630        assert_eq!(find("main").entity_type, "program");
1631
1632        // Nested entities have parent
1633        assert!(find("add").parent_id.is_some());
1634        assert!(find("greet").parent_id.is_some());
1635    }
1636
1637    #[test]
1638    fn test_scala_entity_extraction() {
1639        let code = r#"
1640package com.example
1641
1642import scala.collection.mutable
1643
1644class UserService(val name: String) {
1645  def getUsers(): List[User] = db.findAll()
1646
1647  def createUser(user: User): Unit = db.save(user)
1648
1649  private def validate(user: User): Boolean = true
1650}
1651
1652object UserService {
1653  def apply(name: String): UserService = new UserService(name)
1654
1655  val DefaultName: String = "default"
1656}
1657
1658trait Repository[T] {
1659  def findById(id: String): Option[T]
1660  def findAll(): List[T]
1661}
1662
1663case class User(id: String, name: String)
1664
1665type UserId = String
1666"#;
1667        let plugin = CodeParserPlugin;
1668        let entities = plugin.extract_entities(code, "UserService.scala");
1669        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1670        eprintln!("Scala entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1671
1672        assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
1673        assert!(names.contains(&"Repository"), "Should find trait Repository, got: {:?}", names);
1674        assert!(names.contains(&"getUsers"), "Should find method getUsers, got: {:?}", names);
1675        assert!(names.contains(&"createUser"), "Should find method createUser, got: {:?}", names);
1676
1677        // Methods should be nested under class
1678        let get_users = entities.iter().find(|e| e.name == "getUsers").unwrap();
1679        assert!(get_users.parent_id.is_some(), "getUsers should have parent_id");
1680    }
1681
1682    #[test]
1683    fn test_scala3_entity_extraction() {
1684        let code = r#"
1685package com.example
1686
1687enum Color:
1688  case Red, Green, Blue
1689
1690enum Planet(mass: Double, radius: Double):
1691  case Mercury extends Planet(3.303e+23, 2.4397e6)
1692  case Venus   extends Planet(4.869e+24, 6.0518e6)
1693
1694object Main:
1695  def main(args: Array[String]): Unit =
1696    println("Hello, World!")
1697
1698trait Greeter:
1699  def greet(name: String): String
1700
1701given Greeter with
1702  def greet(name: String): String = s"Hello, $name!"
1703
1704extension (s: String)
1705  def shout: String = s.toUpperCase + "!"
1706
1707type Predicate[A] = A => Boolean
1708"#;
1709        let plugin = CodeParserPlugin;
1710        let entities = plugin.extract_entities(code, "Main.scala");
1711        let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1712        eprintln!("Scala 3 entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1713
1714        assert!(names.contains(&"Color"), "Should find enum Color, got: {:?}", names);
1715        assert!(names.contains(&"Planet"), "Should find enum Planet, got: {:?}", names);
1716        assert!(names.contains(&"Main"), "Should find object Main, got: {:?}", names);
1717        assert!(names.contains(&"Greeter"), "Should find trait Greeter, got: {:?}", names);
1718        assert!(names.contains(&"Predicate"), "Should find type alias Predicate, got: {:?}", names);
1719    }
1720}