1mod entity_extractor;
2pub mod languages;
3
4use std::cell::RefCell;
5use std::collections::HashMap;
6
7use crate::model::entity::SemanticEntity;
8use crate::parser::plugin::SemanticParserPlugin;
9use languages::{get_all_code_extensions, get_language_config};
10use entity_extractor::extract_entities;
11
12pub struct CodeParserPlugin;
13
14thread_local! {
17 static PARSER_CACHE: RefCell<HashMap<&'static str, tree_sitter::Parser>> = RefCell::new(HashMap::new());
18}
19
20impl SemanticParserPlugin for CodeParserPlugin {
21 fn id(&self) -> &str {
22 "code"
23 }
24
25 fn extensions(&self) -> &[&str] {
26 get_all_code_extensions()
27 }
28
29 fn extract_entities(&self, content: &str, file_path: &str) -> Vec<SemanticEntity> {
30 let ext = std::path::Path::new(file_path)
31 .extension()
32 .and_then(|e| e.to_str())
33 .map(|e| format!(".{}", e.to_lowercase()))
34 .unwrap_or_default();
35
36 let config = match get_language_config(&ext) {
37 Some(c) => c,
38 None => {
39 match detect_ext_from_content(content)
41 .and_then(|se| get_language_config(&se))
42 {
43 Some(c) => c,
44 None => return Vec::new(),
45 }
46 }
47 };
48
49 let language = match (config.get_language)() {
50 Some(lang) => lang,
51 None => return Vec::new(),
52 };
53
54 PARSER_CACHE.with(|cache| {
55 let mut cache = cache.borrow_mut();
56 let parser = cache.entry(config.id).or_insert_with(|| {
57 let mut p = tree_sitter::Parser::new();
58 let _ = p.set_language(&language);
59 p
60 });
61
62 let tree = match parser.parse(content.as_bytes(), None) {
63 Some(t) => t,
64 None => return Vec::new(),
65 };
66
67 extract_entities(&tree, file_path, config, content)
68 })
69 }
70}
71
72use crate::parser::registry::detect_ext_from_content;
73
74#[cfg(test)]
75mod tests {
76 use super::*;
77
78 #[test]
79 fn test_java_entity_extraction() {
80 let code = r#"
81package com.example;
82
83import java.util.List;
84
85public class UserService {
86 private String name;
87
88 public UserService(String name) {
89 this.name = name;
90 }
91
92 public List<User> getUsers() {
93 return db.findAll();
94 }
95
96 public void createUser(User user) {
97 db.save(user);
98 }
99}
100
101interface Repository<T> {
102 T findById(String id);
103 List<T> findAll();
104}
105
106enum Status {
107 ACTIVE,
108 INACTIVE,
109 DELETED
110}
111"#;
112 let plugin = CodeParserPlugin;
113 let entities = plugin.extract_entities(code, "UserService.java");
114 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
115 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
116 eprintln!("Java entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
117
118 assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
119 assert!(names.contains(&"Repository"), "Should find interface Repository, got: {:?}", names);
120 assert!(names.contains(&"Status"), "Should find enum Status, got: {:?}", names);
121 }
122
123 #[test]
124 fn test_java_nested_methods() {
125 let code = r#"
126public class Calculator {
127 public int add(int a, int b) {
128 return a + b;
129 }
130
131 public int subtract(int a, int b) {
132 return a - b;
133 }
134}
135"#;
136 let plugin = CodeParserPlugin;
137 let entities = plugin.extract_entities(code, "Calculator.java");
138 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
139 eprintln!("Java nested: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
140
141 assert!(names.contains(&"Calculator"), "Should find Calculator class");
142 assert!(names.contains(&"add"), "Should find add method, got: {:?}", names);
143 assert!(names.contains(&"subtract"), "Should find subtract method, got: {:?}", names);
144
145 let add = entities.iter().find(|e| e.name == "add").unwrap();
147 assert!(add.parent_id.is_some(), "add should have parent_id");
148 }
149
150 #[test]
151 fn test_c_entity_extraction() {
152 let code = r#"
153#include <stdio.h>
154
155struct Point {
156 int x;
157 int y;
158};
159
160enum Color {
161 RED,
162 GREEN,
163 BLUE
164};
165
166typedef struct {
167 char name[50];
168 int age;
169} Person;
170
171void greet(const char* name) {
172 printf("Hello, %s!\n", name);
173}
174
175int add(int a, int b) {
176 return a + b;
177}
178
179int main() {
180 greet("world");
181 return 0;
182}
183"#;
184 let plugin = CodeParserPlugin;
185 let entities = plugin.extract_entities(code, "main.c");
186 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
187 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
188 eprintln!("C entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
189
190 assert!(names.contains(&"greet"), "Should find greet function, got: {:?}", names);
191 assert!(names.contains(&"add"), "Should find add function, got: {:?}", names);
192 assert!(names.contains(&"main"), "Should find main function, got: {:?}", names);
193 assert!(names.contains(&"Point"), "Should find Point struct, got: {:?}", names);
194 assert!(names.contains(&"Color"), "Should find Color enum, got: {:?}", names);
195 }
196
197 #[test]
198 fn test_cpp_entity_extraction() {
199 let code = "namespace math {\nclass Vector3 {\npublic:\n float length() const { return 0; }\n};\n}\nvoid greet() {}\n";
200 let plugin = CodeParserPlugin;
201 let entities = plugin.extract_entities(code, "main.cpp");
202 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
203 assert!(names.contains(&"math"), "got: {:?}", names);
204 assert!(names.contains(&"Vector3"), "got: {:?}", names);
205 assert!(names.contains(&"greet"), "got: {:?}", names);
206 }
207
208 #[test]
209 fn test_ruby_entity_extraction() {
210 let code = "module Auth\n class User\n def greet\n \"hi\"\n end\n end\nend\ndef helper(x)\n x * 2\nend\n";
211 let plugin = CodeParserPlugin;
212 let entities = plugin.extract_entities(code, "auth.rb");
213 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
214 assert!(names.contains(&"Auth"), "got: {:?}", names);
215 assert!(names.contains(&"User"), "got: {:?}", names);
216 assert!(names.contains(&"helper"), "got: {:?}", names);
217 }
218
219 #[test]
220 fn test_csharp_entity_extraction() {
221 let code = "namespace MyApp {\npublic class User {\n public string GetName() { return \"\"; }\n}\npublic enum Role { Admin, User }\n}\n";
222 let plugin = CodeParserPlugin;
223 let entities = plugin.extract_entities(code, "Models.cs");
224 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
225 assert!(names.contains(&"MyApp"), "got: {:?}", names);
226 assert!(names.contains(&"User"), "got: {:?}", names);
227 assert!(names.contains(&"Role"), "got: {:?}", names);
228 }
229
230 #[test]
231 fn test_swift_entity_extraction() {
232 let code = r#"
233import Foundation
234
235class UserService {
236 var name: String
237
238 init(name: String) {
239 self.name = name
240 }
241
242 func getUsers() -> [User] {
243 return db.findAll()
244 }
245}
246
247struct Point {
248 var x: Double
249 var y: Double
250}
251
252enum Status {
253 case active
254 case inactive
255 case deleted
256}
257
258protocol Repository {
259 associatedtype Item
260 func findById(id: String) -> Item?
261 func findAll() -> [Item]
262}
263
264func helper(x: Int) -> Int {
265 return x * 2
266}
267"#;
268 let plugin = CodeParserPlugin;
269 let entities = plugin.extract_entities(code, "UserService.swift");
270 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
271 eprintln!("Swift entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
272
273 assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
274 assert!(names.contains(&"Point"), "Should find struct Point, got: {:?}", names);
275 assert!(names.contains(&"Status"), "Should find enum Status, got: {:?}", names);
276 assert!(names.contains(&"Repository"), "Should find protocol Repository, got: {:?}", names);
277 assert!(names.contains(&"helper"), "Should find function helper, got: {:?}", names);
278 }
279
280 #[test]
281 fn test_elixir_entity_extraction() {
282 let code = r#"
283defmodule MyApp.Accounts do
284 def create_user(attrs) do
285 %User{}
286 |> User.changeset(attrs)
287 |> Repo.insert()
288 end
289
290 defp validate(attrs) do
291 # private helper
292 :ok
293 end
294
295 defmacro is_admin(user) do
296 quote do
297 unquote(user).role == :admin
298 end
299 end
300
301 defguard is_positive(x) when is_integer(x) and x > 0
302end
303
304defprotocol Printable do
305 def to_string(data)
306end
307
308defimpl Printable, for: Integer do
309 def to_string(i), do: Integer.to_string(i)
310end
311"#;
312 let plugin = CodeParserPlugin;
313 let entities = plugin.extract_entities(code, "accounts.ex");
314 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
315 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
316 eprintln!("Elixir entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
317
318 assert!(names.contains(&"MyApp.Accounts"), "Should find module, got: {:?}", names);
319 assert!(names.contains(&"create_user"), "Should find def, got: {:?}", names);
320 assert!(names.contains(&"validate"), "Should find defp, got: {:?}", names);
321 assert!(names.contains(&"is_admin"), "Should find defmacro, got: {:?}", names);
322 assert!(names.contains(&"Printable"), "Should find defprotocol, got: {:?}", names);
323
324 let create_user = entities.iter().find(|e| e.name == "create_user").unwrap();
326 assert!(create_user.parent_id.is_some(), "create_user should be nested under module");
327 }
328
329 #[test]
330 fn test_bash_entity_extraction() {
331 let code = r#"#!/bin/bash
332
333greet() {
334 echo "Hello, $1!"
335}
336
337function deploy {
338 echo "deploying..."
339}
340
341# not a function
342echo "main script"
343"#;
344 let plugin = CodeParserPlugin;
345 let entities = plugin.extract_entities(code, "deploy.sh");
346 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
347 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
348 eprintln!("Bash entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
349
350 assert!(names.contains(&"greet"), "Should find greet(), got: {:?}", names);
351 assert!(names.contains(&"deploy"), "Should find function deploy, got: {:?}", names);
352 assert_eq!(entities.len(), 2, "Should only find functions, got: {:?}", names);
353 }
354
355 #[test]
356 fn test_typescript_entity_extraction() {
357 let code = r#"
359export function hello(): string {
360 return "hello";
361}
362
363export class Greeter {
364 greet(name: string): string {
365 return `Hello, ${name}!`;
366 }
367}
368"#;
369 let plugin = CodeParserPlugin;
370 let entities = plugin.extract_entities(code, "test.ts");
371 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
372 assert!(names.contains(&"hello"), "Should find hello function");
373 assert!(names.contains(&"Greeter"), "Should find Greeter class");
374 }
375
376 #[test]
377 fn test_module_typescript_entity_extraction() {
378 let code = r#"
379export function hello(): string {
380 return "hello";
381}
382"#;
383 let plugin = CodeParserPlugin;
384 let entities = plugin.extract_entities(code, "test.mts");
385 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
386
387 assert!(names.contains(&"hello"), "Should find hello function");
388 }
389
390 #[test]
391 fn test_commonjs_typescript_entity_extraction() {
392 let code = r#"
393export class Greeter {
394 greet(name: string): string {
395 return `Hello, ${name}!`;
396 }
397}
398"#;
399 let plugin = CodeParserPlugin;
400 let entities = plugin.extract_entities(code, "test.cts");
401 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
402
403 assert!(names.contains(&"Greeter"), "Should find Greeter class");
404 assert!(names.contains(&"greet"), "Should find greet method");
405 }
406
407 #[test]
408 fn test_typescript_generator_function_entity_extraction() {
409 let code = r#"
410export async function* streamUsers(): AsyncGenerator<string> {
411 yield "alice";
412}
413"#;
414 let plugin = CodeParserPlugin;
415 let entities = plugin.extract_entities(code, "stream.ts");
416 let stream = entities.iter().find(|e| e.name == "streamUsers");
417
418 assert!(stream.is_some(), "Should find generator function, got: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
419 assert_eq!(stream.unwrap().entity_type, "function");
420 }
421
422 #[test]
423 fn test_javascript_generator_function_entity_extraction() {
424 let code = r#"
425export function* ids() {
426 yield 1;
427 yield 2;
428}
429"#;
430 let plugin = CodeParserPlugin;
431 let entities = plugin.extract_entities(code, "ids.js");
432 let ids = entities.iter().find(|e| e.name == "ids");
433
434 assert!(ids.is_some(), "Should find generator function, got: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
435 assert_eq!(ids.unwrap().entity_type, "function");
436 }
437
438 #[test]
439 fn test_nested_functions_typescript() {
440 let code = r#"
441function outer() {
442 function inner() {
443 return 42;
444 }
445 return inner();
446}
447"#;
448 let plugin = CodeParserPlugin;
449 let entities = plugin.extract_entities(code, "nested.ts");
450 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
451 eprintln!("Nested TS: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
452
453 assert!(names.contains(&"outer"), "Should find outer, got: {:?}", names);
454 assert!(names.contains(&"inner"), "Should find inner, got: {:?}", names);
455
456 let inner = entities.iter().find(|e| e.name == "inner").unwrap();
457 assert!(inner.parent_id.is_some(), "inner should have parent_id");
458 }
459
460 #[test]
461 fn test_nested_functions_python() {
462 let code = "def outer():\n def inner():\n return 42\n return inner()\n";
463 let plugin = CodeParserPlugin;
464 let entities = plugin.extract_entities(code, "nested.py");
465 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
466
467 assert!(names.contains(&"outer"), "got: {:?}", names);
468 assert!(names.contains(&"inner"), "got: {:?}", names);
469
470 let inner = entities.iter().find(|e| e.name == "inner").unwrap();
471 assert!(inner.parent_id.is_some(), "inner should have parent_id");
472 }
473
474 #[test]
475 fn test_nested_functions_rust() {
476 let code = "fn outer() {\n fn inner() -> i32 {\n 42\n }\n inner();\n}\n";
477 let plugin = CodeParserPlugin;
478 let entities = plugin.extract_entities(code, "nested.rs");
479 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
480
481 assert!(names.contains(&"outer"), "got: {:?}", names);
482 assert!(names.contains(&"inner"), "got: {:?}", names);
483
484 let inner = entities.iter().find(|e| e.name == "inner").unwrap();
485 assert!(inner.parent_id.is_some(), "inner should have parent_id");
486 }
487
488 #[test]
489 fn test_rust_impl_blocks_unique_names() {
490 let code = r#"
491trait Greeting {
492 fn greet(&self) -> String;
493}
494
495struct Person;
496struct Robot;
497struct Cat;
498
499impl Greeting for Person {
500 fn greet(&self) -> String { "Hello".to_string() }
501}
502
503impl Greeting for Robot {
504 fn greet(&self) -> String { "Beep".to_string() }
505}
506
507impl Greeting for Cat {
508 fn greet(&self) -> String { "Meow".to_string() }
509}
510"#;
511 let plugin = CodeParserPlugin;
512 let entities = plugin.extract_entities(code, "impls.rs");
513 let impl_entities: Vec<&_> = entities.iter()
514 .filter(|e| e.entity_type == "impl")
515 .collect();
516 let names: Vec<&str> = impl_entities.iter().map(|e| e.name.as_str()).collect();
517
518 assert_eq!(impl_entities.len(), 3, "Should find 3 impl blocks, got: {:?}", names);
519 assert!(names.contains(&"Greeting for Person"), "got: {:?}", names);
520 assert!(names.contains(&"Greeting for Robot"), "got: {:?}", names);
521 assert!(names.contains(&"Greeting for Cat"), "got: {:?}", names);
522 }
523
524 #[test]
525 fn test_nested_functions_go() {
526 let code = "package main\n\nfunc outer() {\n var x int = 42\n _ = x\n}\n";
528 let plugin = CodeParserPlugin;
529 let entities = plugin.extract_entities(code, "nested.go");
530 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
531
532 assert!(names.contains(&"outer"), "got: {:?}", names);
533 }
534
535 #[test]
536 fn test_renamed_function_same_structural_hash() {
537 let code_a = "def get_card():\n return db.query('cards')\n";
538 let code_b = "def get_card_1():\n return db.query('cards')\n";
539
540 let plugin = CodeParserPlugin;
541 let entities_a = plugin.extract_entities(code_a, "a.py");
542 let entities_b = plugin.extract_entities(code_b, "b.py");
543
544 assert_eq!(entities_a.len(), 1, "Should find one entity in a");
545 assert_eq!(entities_b.len(), 1, "Should find one entity in b");
546 assert_eq!(entities_a[0].name, "get_card");
547 assert_eq!(entities_b[0].name, "get_card_1");
548
549 assert_eq!(
551 entities_a[0].structural_hash, entities_b[0].structural_hash,
552 "Renamed function with identical body should have same structural_hash"
553 );
554
555 assert_ne!(
557 entities_a[0].content_hash, entities_b[0].content_hash,
558 "Content hash should differ since raw content includes the name"
559 );
560 }
561
562 #[test]
563 fn test_hcl_entity_extraction() {
564 let code = r#"
565region = "eu-west-1"
566
567variable "image_id" {
568 type = string
569}
570
571resource "aws_instance" "web" {
572 ami = var.image_id
573
574 lifecycle {
575 create_before_destroy = true
576 }
577}
578"#;
579 let plugin = CodeParserPlugin;
580 let entities = plugin.extract_entities(code, "main.tf");
581 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
582 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
583 eprintln!("HCL entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
584
585 assert!(names.contains(&"region"), "Should find top-level attribute, got: {:?}", names);
586 assert!(names.contains(&"variable.image_id"), "Should find variable block, got: {:?}", names);
587 assert!(names.contains(&"resource.aws_instance.web"), "Should find resource block, got: {:?}", names);
588 assert!(
589 names.contains(&"resource.aws_instance.web.lifecycle"),
590 "Should find nested lifecycle block with qualified name, got: {:?}",
591 names
592 );
593 assert!(!names.contains(&"ami"), "Should skip nested attributes inside blocks, got: {:?}", names);
594 assert!(
595 !names.contains(&"create_before_destroy"),
596 "Should skip nested attributes inside nested blocks, got: {:?}",
597 names
598 );
599
600 let lifecycle = entities
601 .iter()
602 .find(|e| e.name == "resource.aws_instance.web.lifecycle")
603 .unwrap();
604 assert!(lifecycle.parent_id.is_some(), "lifecycle should be nested under resource");
605 assert!(types.contains(&"attribute"), "Should preserve attribute entity type for top-level attributes");
606 }
607
608 #[test]
609 fn test_kotlin_entity_extraction() {
610 let code = r#"
611class UserService {
612 val name: String = ""
613
614 fun greet(): String {
615 return "Hello, $name"
616 }
617
618 companion object {
619 fun create(): UserService = UserService()
620 }
621}
622
623interface Repository {
624 fun findById(id: Int): Any?
625}
626
627object AppConfig {
628 val version = "1.0"
629}
630
631fun topLevel(x: Int): Int = x * 2
632"#;
633 let plugin = CodeParserPlugin;
634 let entities = plugin.extract_entities(code, "App.kt");
635 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
636 eprintln!("Kotlin entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
637 assert!(names.contains(&"UserService"), "got: {:?}", names);
638 assert!(names.contains(&"greet"), "got: {:?}", names);
639 assert!(names.contains(&"Repository"), "got: {:?}", names);
640 assert!(names.contains(&"findById"), "got: {:?}", names);
641 assert!(names.contains(&"AppConfig"), "got: {:?}", names);
642 assert!(names.contains(&"topLevel"), "got: {:?}", names);
643 }
644
645 #[test]
646 fn test_xml_entity_extraction() {
647 let code = r#"<?xml version="1.0" encoding="UTF-8"?>
648<project>
649 <groupId>com.example</groupId>
650 <artifactId>my-app</artifactId>
651 <dependencies>
652 <dependency>
653 <groupId>junit</groupId>
654 <artifactId>junit</artifactId>
655 </dependency>
656 </dependencies>
657 <build>
658 <plugins>
659 <plugin>
660 <groupId>org.apache.maven</groupId>
661 </plugin>
662 </plugins>
663 </build>
664</project>
665"#;
666 let plugin = CodeParserPlugin;
667 let entities = plugin.extract_entities(code, "pom.xml");
668 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
669 eprintln!("XML entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
670 assert!(names.contains(&"project"), "got: {:?}", names);
671 assert!(names.contains(&"dependencies"), "got: {:?}", names);
672 assert!(names.contains(&"build"), "got: {:?}", names);
673 }
674
675 #[test]
676 fn test_arrow_callback_scope_boundary_typescript() {
677 let code = r#"
681const activeQueues = [
682 { queue: queues.fooQueue, processor: foo.process },
683];
684
685activeQueues.forEach((handler: any) => {
686 const queue = handler.queue;
687 let retries = 0;
688
689 class QueueHandler {
690 handle() { return queue; }
691 }
692
693 function createHandler() {
694 return new QueueHandler();
695 }
696
697 queue.process((job) => {
698 const orderId = job.data.orderId;
699 return orderId;
700 });
701});
702
703function handleFailure(job: any, err: any) {
704 console.error('failed', err);
705}
706"#;
707 let plugin = CodeParserPlugin;
708 let entities = plugin.extract_entities(code, "process.ts");
709 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
710 let top_level: Vec<&str> = entities
711 .iter()
712 .filter(|e| e.parent_id.is_none())
713 .map(|e| e.name.as_str())
714 .collect();
715
716 assert!(top_level.contains(&"activeQueues"), "got: {:?}", top_level);
718 assert!(top_level.contains(&"handleFailure"), "got: {:?}", top_level);
719
720 assert!(names.contains(&"QueueHandler"), "got: {:?}", names);
722 assert!(names.contains(&"handle"), "got: {:?}", names);
723 assert!(names.contains(&"createHandler"), "got: {:?}", names);
724
725 assert!(!names.contains(&"queue"), "got: {:?}", names);
727 assert!(!names.contains(&"retries"), "got: {:?}", names);
728 assert!(!names.contains(&"orderId"), "got: {:?}", names);
729 }
730
731 #[test]
732 fn test_top_level_iife_wrapper_still_extracts_typescript_entities() {
733 let code = r#"
734function factory() {
735 class Foo {
736 method(): number {
737 return 1;
738 }
739 }
740
741 function bar(): Foo {
742 return new Foo();
743 }
744}
745
746factory();
747"#;
748 let plugin = CodeParserPlugin;
749 let entities = plugin.extract_entities(code, "wrapped.ts");
750 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
751 assert!(
752 names.contains(&"factory"),
753 "Should find top-level wrapper function, got: {:?}",
754 names
755 );
756 assert!(
757 names.contains(&"Foo"),
758 "Should find class inside top-level wrapper, got: {:?}",
759 names
760 );
761 assert!(
762 names.contains(&"bar"),
763 "Should find function inside top-level wrapper, got: {:?}",
764 names
765 );
766 }
767
768 #[test]
769 fn test_top_level_iife_still_extracts_typescript_entities() {
770 let code = r#"
771(() => {
772 class Foo {
773 method(): number {
774 return 1;
775 }
776 }
777
778 function bar(): Foo {
779 return new Foo();
780 }
781})();
782"#;
783 let plugin = CodeParserPlugin;
784 let entities = plugin.extract_entities(code, "iife.ts");
785 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
786 assert!(
787 names.contains(&"Foo"),
788 "Should find class inside top-level IIFE, got: {:?}",
789 names
790 );
791 assert!(
792 names.contains(&"bar"),
793 "Should find function inside top-level IIFE, got: {:?}",
794 names
795 );
796 }
797
798 #[test]
799 fn test_function_locals_not_extracted_as_nested_entities_typescript() {
800 let code = r#"
801export default function foo() {
802 const x = 1;
803 return x;
804}
805"#;
806 let plugin = CodeParserPlugin;
807 let entities = plugin.extract_entities(code, "default-export.ts");
808 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
809 assert!(
810 names.contains(&"foo"),
811 "Should find exported function, got: {:?}",
812 names
813 );
814 assert!(
815 !names.contains(&"x"),
816 "Local inside function should not be extracted as an entity, got: {:?}",
817 names
818 );
819 }
820
821 #[test]
822 fn test_function_expression_scope_boundary_typescript() {
823 let code = r#"
826const foo = function namedExpr(x: number) {
827 const inner = x + 1;
828 return inner;
829};
830
831const bar = function(y: number) {
832 const local = y * 2;
833 return local;
834};
835
836const items = [1, 2, 3];
837
838items.forEach(function process(item) {
839 const doubled = item * 2;
840 console.log(doubled);
841});
842"#;
843 let plugin = CodeParserPlugin;
844 let entities = plugin.extract_entities(code, "funexpr.ts");
845 let top_level: Vec<&str> = entities
846 .iter()
847 .filter(|e| e.parent_id.is_none())
848 .map(|e| e.name.as_str())
849 .collect();
850 let find = |name: &str| entities.iter().find(|e| e.name == name).unwrap();
851 let all_names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
852
853 assert!(top_level.contains(&"foo"), "got: {:?}", top_level);
856 assert!(top_level.contains(&"bar"), "got: {:?}", top_level);
857 assert!(top_level.contains(&"items"), "got: {:?}", top_level);
858 assert_eq!(find("foo").entity_type, "function");
859 assert_eq!(find("bar").entity_type, "function");
860 assert_eq!(find("items").entity_type, "variable");
861
862 assert!(!all_names.contains(&"inner"), "got: {:?}", all_names);
864 assert!(!all_names.contains(&"local"), "got: {:?}", all_names);
865 assert!(!all_names.contains(&"doubled"), "got: {:?}", all_names);
866
867 assert!(!top_level.contains(&"process"), "got: {:?}", top_level);
869 }
870
871 #[test]
872 fn test_variable_assigned_arrow_extracts_inner_entities() {
873 let code = r#"
876const handler = () => {
877 class Inner {
878 run() { return 1; }
879 }
880
881 function make() {
882 return new Inner();
883 }
884
885 const local = 42;
886};
887"#;
888 let plugin = CodeParserPlugin;
889 let entities = plugin.extract_entities(code, "assigned.ts");
890 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
891 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
892
893 assert_eq!(handler.entity_type, "function");
894 assert!(names.contains(&"handler"), "got: {:?}", names);
895 assert!(names.contains(&"Inner"), "got: {:?}", names);
896 assert!(names.contains(&"run"), "got: {:?}", names);
897 assert!(names.contains(&"make"), "got: {:?}", names);
898 assert!(!names.contains(&"local"), "got: {:?}", names);
899 }
900
901 #[test]
902 fn test_variable_assigned_function_expression_extracts_inner_entities() {
903 let code = r#"
905const handler = function() {
906 class Inner {}
907 function make() { return new Inner(); }
908 const local = 42;
909};
910"#;
911 let plugin = CodeParserPlugin;
912 let entities = plugin.extract_entities(code, "funexpr-inner.ts");
913 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
914 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
915
916 assert_eq!(handler.entity_type, "function");
917 assert!(names.contains(&"handler"), "got: {:?}", names);
918 assert!(names.contains(&"Inner"), "got: {:?}", names);
919 assert!(names.contains(&"make"), "got: {:?}", names);
920 assert!(!names.contains(&"local"), "got: {:?}", names);
921 }
922
923 #[test]
924 fn test_let_assigned_arrow_stays_variable_typescript() {
925 let code = r#"
926let handler = () => {
927 return 42;
928};
929"#;
930 let plugin = CodeParserPlugin;
931 let entities = plugin.extract_entities(code, "let-assigned.ts");
932 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
933
934 assert_eq!(handler.entity_type, "variable");
935 }
936
937 #[test]
938 fn test_const_assigned_arrow_promoted_to_function_javascript() {
939 let code = r#"
940const handler = () => {
941 return 42;
942};
943"#;
944 let plugin = CodeParserPlugin;
945 let entities = plugin.extract_entities(code, "handler.js");
946 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
947
948 assert_eq!(handler.entity_type, "function");
949 }
950
951 #[test]
952 fn test_go_var_declaration() {
953 let code = r#"package featuremgmt
954
955type FeatureFlag struct {
956 Name string
957 Description string
958 Stage string
959}
960
961var standardFeatureFlags = []FeatureFlag{
962 {
963 Name: "panelTitleSearch",
964 Description: "Search for dashboards using panel title",
965 Stage: "PublicPreview",
966 },
967}
968
969func GetFlags() []FeatureFlag {
970 return standardFeatureFlags
971}
972"#;
973 let plugin = CodeParserPlugin;
974 let entities = plugin.extract_entities(code, "flags.go");
975 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
976 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
977 eprintln!("Go entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
978
979 assert!(names.contains(&"FeatureFlag"), "Should find type FeatureFlag, got: {:?}", names);
980 assert!(names.contains(&"standardFeatureFlags"), "Should find var standardFeatureFlags, got: {:?}", names);
981 assert!(names.contains(&"GetFlags"), "Should find func GetFlags, got: {:?}", names);
982 }
983
984 #[test]
985 fn test_go_grouped_var_declaration() {
986 let code = r#"package test
987
988var (
989 simple = 42
990 flags = []string{"a", "b"}
991)
992
993const (
994 x = 1
995 y = 2
996)
997
998func main() {}
999"#;
1000 let plugin = CodeParserPlugin;
1001 let entities = plugin.extract_entities(code, "test.go");
1002 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1003 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1004 eprintln!("Go grouped entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
1005
1006 assert!(names.contains(&"flags") || names.contains(&"simple"), "Should find grouped var, got: {:?}", names);
1007 assert!(names.contains(&"x"), "Should find grouped const x, got: {:?}", names);
1008 assert!(names.contains(&"main"), "Should find func main, got: {:?}", names);
1009 }
1010
1011 #[test]
1012 fn test_dart_entity_extraction() {
1013 let code = r#"
1014import 'dart:math';
1015
1016class Calculator {
1017 final String name;
1018
1019 Calculator(this.name);
1020
1021 Calculator.withDefault() : name = 'default';
1022
1023 factory Calculator.create(String name) {
1024 return Calculator(name);
1025 }
1026
1027 int add(int a, int b) {
1028 return a + b;
1029 }
1030
1031 int get doubleAdd => add(1, 1) * 2;
1032
1033 set label(String value) {
1034 // no-op
1035 }
1036
1037 int operator +(Calculator other) {
1038 return 0;
1039 }
1040}
1041
1042mixin Loggable {
1043 void log(String message) {
1044 print(message);
1045 }
1046}
1047
1048extension StringExt on String {
1049 bool get isBlank => trim().isEmpty;
1050}
1051
1052enum Status {
1053 active,
1054 inactive;
1055
1056 String display() => name.toUpperCase();
1057}
1058
1059typedef Callback = void Function(int);
1060
1061int add(int a, int b) {
1062 return a + b;
1063}
1064
1065extension type Wrapper(int value) implements int {}
1066"#;
1067 let plugin = CodeParserPlugin;
1068 let entities = plugin.extract_entities(code, "calculator.dart");
1069 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1070 eprintln!(
1071 "Dart entities: {:?}",
1072 entities
1073 .iter()
1074 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1075 .collect::<Vec<_>>()
1076 );
1077
1078 assert!(names.contains(&"Calculator"), "Should find class, got: {:?}", names);
1080 assert!(names.contains(&"Loggable"), "Should find mixin, got: {:?}", names);
1081 assert!(names.contains(&"StringExt"), "Should find extension, got: {:?}", names);
1082 assert!(names.contains(&"Status"), "Should find enum, got: {:?}", names);
1083 assert!(names.contains(&"Callback"), "Should find typedef, got: {:?}", names);
1084 assert!(names.contains(&"add"), "Should find top-level function, got: {:?}", names);
1085 assert!(names.contains(&"Wrapper"), "Should find extension type, got: {:?}", names);
1086
1087 let add_method = entities.iter().find(|e| e.name == "add" && e.parent_id.is_some());
1089 assert!(add_method.is_some(), "Should find add method inside Calculator");
1090 assert_eq!(add_method.unwrap().entity_type, "method");
1091
1092 let unnamed_ctor = entities.iter().find(|e| e.name == "Calculator" && e.entity_type == "constructor");
1094 assert!(unnamed_ctor.is_some(), "Should find unnamed constructor");
1095 let named_ctor = entities.iter().find(|e| e.name == "Calculator.withDefault");
1096 assert!(named_ctor.is_some(), "Should find named constructor Calculator.withDefault, got: {:?}", names);
1097 assert_eq!(named_ctor.unwrap().entity_type, "constructor");
1098 assert_ne!(unnamed_ctor.unwrap().id, named_ctor.unwrap().id, "Named and unnamed constructors must have different entity IDs");
1099
1100 let factory_ctor = entities.iter().find(|e| e.name == "Calculator.create");
1102 assert!(factory_ctor.is_some(), "Should find factory constructor Calculator.create, got: {:?}", names);
1103 assert_eq!(factory_ctor.unwrap().entity_type, "constructor");
1104
1105 let getter = entities.iter().find(|e| e.name == "doubleAdd");
1107 assert!(getter.is_some(), "Should find getter doubleAdd");
1108 assert_eq!(getter.unwrap().entity_type, "getter");
1109
1110 let setter = entities.iter().find(|e| e.name == "label");
1111 assert!(setter.is_some(), "Should find setter label");
1112 assert_eq!(setter.unwrap().entity_type, "setter");
1113
1114 let operator = entities.iter().find(|e| e.name == "operator +");
1115 assert!(operator.is_some(), "Should find operator +");
1116 assert_eq!(operator.unwrap().entity_type, "method");
1117
1118 let log_method = entities.iter().find(|e| e.name == "log");
1120 assert!(log_method.is_some(), "Should find log in Loggable");
1121 assert!(log_method.unwrap().parent_id.is_some(), "log should have parent_id");
1122
1123 let callback = entities.iter().find(|e| e.name == "Callback").unwrap();
1125 assert_eq!(callback.entity_type, "type", "typedef should map to 'type'");
1126
1127 let loggable = entities.iter().find(|e| e.name == "Loggable").unwrap();
1128 assert_eq!(loggable.entity_type, "mixin");
1129
1130 let ext = entities.iter().find(|e| e.name == "StringExt").unwrap();
1131 assert_eq!(ext.entity_type, "extension");
1132
1133 let wrapper = entities.iter().find(|e| e.name == "Wrapper").unwrap();
1134 assert_eq!(wrapper.entity_type, "extension");
1135 }
1136
1137 #[test]
1138 fn test_dart_top_level_function_includes_body() {
1139 let code = r#"
1140int add(int a, int b) {
1141 return a + b;
1142}
1143
1144String greet(String name) => 'Hello, $name!';
1145"#;
1146 let plugin = CodeParserPlugin;
1147 let entities = plugin.extract_entities(code, "funcs.dart");
1148 eprintln!(
1149 "Dart top-level: {:?}",
1150 entities
1151 .iter()
1152 .map(|e| (&e.name, &e.entity_type, &e.content))
1153 .collect::<Vec<_>>()
1154 );
1155
1156 let add_fn = entities.iter().find(|e| e.name == "add").unwrap();
1157 assert!(
1158 add_fn.content.contains("return a + b"),
1159 "Top-level function content should include the body, got: {:?}",
1160 add_fn.content
1161 );
1162
1163 let greet_fn = entities.iter().find(|e| e.name == "greet").unwrap();
1164 assert!(
1165 greet_fn.content.contains("Hello"),
1166 "Expression body should be included, got: {:?}",
1167 greet_fn.content
1168 );
1169
1170 let code_v2 = r#"
1172int add(int a, int b) {
1173 return a * b;
1174}
1175
1176String greet(String name) => 'Hello, $name!';
1177"#;
1178 let entities_v2 = plugin.extract_entities(code_v2, "funcs.dart");
1179 let add_v2 = entities_v2.iter().find(|e| e.name == "add").unwrap();
1180 assert_ne!(
1181 add_fn.content_hash, add_v2.content_hash,
1182 "Body change should produce different content_hash"
1183 );
1184
1185 let greet_v2 = entities_v2.iter().find(|e| e.name == "greet").unwrap();
1187 assert_eq!(
1188 greet_fn.content_hash, greet_v2.content_hash,
1189 "Unchanged function should keep the same content_hash"
1190 );
1191 }
1192
1193 #[test]
1194 fn test_dart_renamed_named_constructor_same_structural_hash() {
1195 let code_a = r#"
1196class Foo {
1197 Foo.fromJson(Map<String, dynamic> json) {
1198 print(json);
1199 }
1200}
1201"#;
1202 let code_b = r#"
1203class Foo {
1204 Foo.fromMap(Map<String, dynamic> json) {
1205 print(json);
1206 }
1207}
1208"#;
1209 let plugin = CodeParserPlugin;
1210 let entities_a = plugin.extract_entities(code_a, "a.dart");
1211 let entities_b = plugin.extract_entities(code_b, "b.dart");
1212
1213 let ctor_a = entities_a.iter().find(|e| e.name == "Foo.fromJson").unwrap();
1214 let ctor_b = entities_b.iter().find(|e| e.name == "Foo.fromMap").unwrap();
1215
1216 assert_eq!(
1217 ctor_a.structural_hash, ctor_b.structural_hash,
1218 "Renamed named constructor with identical body should have same structural_hash"
1219 );
1220 assert_ne!(
1221 ctor_a.content_hash, ctor_b.content_hash,
1222 "Content hash should differ since raw content includes the name"
1223 );
1224 }
1225
1226 #[test]
1227 fn test_dart_top_level_getter_setter() {
1228 let code = r#"
1229int _value = 0;
1230
1231int get currentValue {
1232 return _value;
1233}
1234
1235set currentValue(int v) {
1236 _value = v;
1237}
1238"#;
1239 let plugin = CodeParserPlugin;
1240 let entities = plugin.extract_entities(code, "accessors.dart");
1241 eprintln!(
1242 "Dart top-level accessors: {:?}",
1243 entities
1244 .iter()
1245 .map(|e| (&e.name, &e.entity_type, &e.content))
1246 .collect::<Vec<_>>()
1247 );
1248
1249 let getter = entities.iter().find(|e| e.name == "currentValue" && e.entity_type == "getter");
1250 assert!(getter.is_some(), "Should find top-level getter, got: {:?}",
1251 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1252 assert!(
1253 getter.unwrap().content.contains("return _value"),
1254 "Top-level getter content should include the body"
1255 );
1256 assert!(getter.unwrap().parent_id.is_none(), "Top-level getter should have no parent");
1257
1258 let setter = entities.iter().find(|e| e.name == "currentValue" && e.entity_type == "function");
1262 assert!(setter.is_some(), "Should find top-level setter as function, got: {:?}",
1263 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1264 assert!(
1265 setter.unwrap().content.contains("_value = v"),
1266 "Top-level setter content should include the body"
1267 );
1268 }
1269
1270 #[test]
1271 fn test_dart_field_entity_type() {
1272 let code = r#"
1273class Config {
1274 final String name;
1275 static const int maxRetries = 3;
1276}
1277"#;
1278 let plugin = CodeParserPlugin;
1279 let entities = plugin.extract_entities(code, "config.dart");
1280 eprintln!(
1281 "Dart fields: {:?}",
1282 entities
1283 .iter()
1284 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1285 .collect::<Vec<_>>()
1286 );
1287
1288 let name_field = entities.iter().find(|e| e.name == "name" && e.parent_id.is_some());
1289 assert!(name_field.is_some(), "Should find field 'name', got: {:?}",
1290 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1291 assert_eq!(name_field.unwrap().entity_type, "field");
1292
1293 let max_retries = entities.iter().find(|e| e.name == "maxRetries");
1294 assert!(max_retries.is_some(), "Should find field 'maxRetries', got: {:?}",
1295 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1296 assert_eq!(max_retries.unwrap().entity_type, "field");
1297 }
1298
1299 #[test]
1300 fn test_dart_identifier_list_fields() {
1301 let code = r#"
1305abstract class Shape {
1306 abstract double x, y;
1307 abstract String label;
1308}
1309"#;
1310 let plugin = CodeParserPlugin;
1311 let entities = plugin.extract_entities(code, "shape.dart");
1312 eprintln!(
1313 "Dart identifier_list fields: {:?}",
1314 entities
1315 .iter()
1316 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1317 .collect::<Vec<_>>()
1318 );
1319
1320 let x_field = entities.iter().find(|e| e.name == "x");
1321 assert!(x_field.is_some(), "Should find field 'x' from identifier_list, got: {:?}",
1322 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1323 assert_eq!(x_field.unwrap().entity_type, "field");
1324 assert!(x_field.unwrap().parent_id.is_some(), "field 'x' should be nested under Shape");
1325
1326 let label_field = entities.iter().find(|e| e.name == "label");
1327 assert!(label_field.is_some(), "Should find field 'label' from single-element identifier_list, got: {:?}",
1328 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1329 assert_eq!(label_field.unwrap().entity_type, "field");
1330 }
1331
1332 #[test]
1333 fn test_ocaml_entity_extraction() {
1334 let code = r#"
1335type color = Red | Green | Blue
1336
1337type point = {
1338 x : float;
1339 y : float;
1340}
1341
1342exception Not_found of string
1343
1344let greet name =
1345 Printf.printf "Hello, %s!\n" name
1346
1347let add a b = a + b
1348
1349let version = "1.0"
1350
1351let color_to_string = function
1352 | Red -> "red"
1353 | Blue -> "blue"
1354
1355let inc = fun x -> x + 1
1356
1357module MyModule = struct
1358 let helper x = x * 2
1359end
1360
1361module type Printable = sig
1362 val to_string : 'a -> string
1363end
1364
1365external caml_input : in_channel -> bytes -> int -> int -> int = "caml_input"
1366
1367class point_class x_init = object
1368 val mutable x = x_init
1369 method get_x = x
1370end
1371
1372class type measurable = object
1373 method measure : float
1374end
1375"#;
1376 let plugin = CodeParserPlugin;
1377 let entities = plugin.extract_entities(code, "example.ml");
1378 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1379 eprintln!("OCaml entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1380
1381 let find = |name: &str| entities.iter().find(|e| e.name == name)
1382 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1383
1384 assert_eq!(find("color").entity_type, "type");
1385 assert_eq!(find("point").entity_type, "type");
1386 assert_eq!(find("Not_found").entity_type, "exception");
1387 assert_eq!(find("greet").entity_type, "function");
1388 assert_eq!(find("add").entity_type, "function");
1389 assert_eq!(find("version").entity_type, "value");
1390 assert_eq!(find("color_to_string").entity_type, "function");
1391 assert_eq!(find("inc").entity_type, "function");
1392 assert_eq!(find("MyModule").entity_type, "module");
1393 assert_eq!(find("Printable").entity_type, "module_type");
1394 assert_eq!(find("caml_input").entity_type, "external");
1395 assert_eq!(find("point_class").entity_type, "class");
1396 assert_eq!(find("measurable").entity_type, "class_type");
1397 }
1398
1399 #[test]
1400 fn test_ocaml_nested_module_entities() {
1401 let code = r#"
1402module Outer = struct
1403 let x = 42
1404
1405 module Inner = struct
1406 let y = 0
1407 end
1408end
1409"#;
1410 let plugin = CodeParserPlugin;
1411 let entities = plugin.extract_entities(code, "nested.ml");
1412 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1413 eprintln!("OCaml nested: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
1414
1415 let find = |name: &str| entities.iter().find(|e| e.name == name)
1416 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1417
1418 let outer = find("Outer");
1419 let x = find("x");
1420 let inner = find("Inner");
1421 let y = find("y");
1422
1423 assert_eq!(outer.entity_type, "module");
1424 assert_eq!(x.entity_type, "value");
1425 assert_eq!(inner.entity_type, "module");
1426 assert_eq!(y.entity_type, "value");
1427
1428 assert!(x.parent_id.as_ref().is_some_and(|p| p == &outer.id), "x should be nested under Outer");
1429 assert!(inner.parent_id.as_ref().is_some_and(|p| p == &outer.id), "Inner should be nested under Outer");
1430 assert!(y.parent_id.as_ref().is_some_and(|p| p == &inner.id), "y should be nested under Inner");
1431 }
1432
1433 #[test]
1434 fn test_ocaml_interface_entity_extraction() {
1435 let code = r#"
1436type t
1437
1438val create : string -> t
1439val to_string : t -> string
1440
1441exception Invalid_input of string
1442
1443module type Serializable = sig
1444 val serialize : t -> string
1445end
1446"#;
1447 let plugin = CodeParserPlugin;
1448 let entities = plugin.extract_entities(code, "example.mli");
1449 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1450 eprintln!("OCaml interface entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1451
1452 let find = |name: &str| entities.iter().find(|e| e.name == name)
1453 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1454
1455 assert_eq!(find("t").entity_type, "type");
1456 assert_eq!(find("create").entity_type, "val");
1457 assert_eq!(find("to_string").entity_type, "val");
1458 assert_eq!(find("Invalid_input").entity_type, "exception");
1459 assert_eq!(find("Serializable").entity_type, "module_type");
1460 }
1461
1462 #[test]
1463 fn test_ocaml_mutual_recursion_let() {
1464 let code = r#"
1465let rec even n = (n = 0) || odd (n - 1)
1466and odd n = (n <> 0) && even (n - 1)
1467
1468let rec ping x = pong (x - 1)
1469and pong x = if x <= 0 then 0 else ping (x - 1)
1470"#;
1471 let plugin = CodeParserPlugin;
1472 let entities = plugin.extract_entities(code, "mutual.ml");
1473 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1474 eprintln!("OCaml mutual let: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1475
1476 let find = |name: &str| entities.iter().find(|e| e.name == name)
1477 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1478
1479 assert_eq!(find("even").entity_type, "function");
1480 assert_eq!(find("odd").entity_type, "function");
1481 assert_eq!(find("ping").entity_type, "function");
1482 assert_eq!(find("pong").entity_type, "function");
1483 }
1484
1485 #[test]
1486 fn test_ocaml_mutual_recursion_module() {
1487 let code = r#"
1488module rec A : sig val x : int end = struct
1489 let x = B.y + 1
1490end
1491and B : sig val y : int end = struct
1492 let y = 0
1493end
1494"#;
1495 let plugin = CodeParserPlugin;
1496 let entities = plugin.extract_entities(code, "mutual_mod.ml");
1497 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1498 eprintln!("OCaml mutual module: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
1499
1500 let find = |name: &str| entities.iter().find(|e| e.name == name)
1501 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1502
1503 let a = find("A");
1504 let b = find("B");
1505 assert_eq!(a.entity_type, "module");
1506 assert_eq!(b.entity_type, "module");
1507
1508 let x = find("x");
1509 let y = find("y");
1510 assert!(x.parent_id.as_ref().is_some_and(|p| p == &a.id), "x should be nested under A");
1511 assert!(y.parent_id.as_ref().is_some_and(|p| p == &b.id), "y should be nested under B");
1512 }
1513
1514 #[test]
1515 fn test_ocaml_destructured_let() {
1516 let code = r#"
1517let (a, b) = (1, 2)
1518
1519let { x; y } = point
1520
1521let simple = 42
1522"#;
1523 let plugin = CodeParserPlugin;
1524 let entities = plugin.extract_entities(code, "destruct.ml");
1525 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1526 eprintln!("OCaml destructured: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1527
1528 let find = |name: &str| entities.iter().find(|e| e.name == name)
1529 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1530
1531 assert_eq!(find("a").entity_type, "value");
1532 assert_eq!(find("b").entity_type, "value");
1533 assert_eq!(find("x").entity_type, "value");
1534 assert_eq!(find("y").entity_type, "value");
1535 assert_eq!(find("simple").entity_type, "value");
1536 }
1537
1538 #[test]
1539 fn test_ocaml_mutual_recursion_class() {
1540 let code = r#"
1541class foo = object
1542 method x = 1
1543end
1544and bar = object
1545 method y = 2
1546end
1547"#;
1548 let plugin = CodeParserPlugin;
1549 let entities = plugin.extract_entities(code, "classes.ml");
1550 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1551 eprintln!("OCaml mutual class: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1552
1553 let find = |name: &str| entities.iter().find(|e| e.name == name)
1554 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1555
1556 assert_eq!(find("foo").entity_type, "class");
1557 assert_eq!(find("bar").entity_type, "class");
1558 }
1559
1560 #[test]
1561 fn test_perl_entity_extraction() {
1562 let code = r#"package Foo::Bar;
1563
1564use strict;
1565use warnings;
1566
1567sub hello {
1568 my ($self, $name) = @_;
1569 print "Hello, $name!\n";
1570}
1571
1572sub _private_helper {
1573 return 42;
1574}
1575
15761;
1577"#;
1578 let plugin = CodeParserPlugin;
1579 let entities = plugin.extract_entities(code, "Foo/Bar.pm");
1580 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1581
1582 assert!(names.contains(&"Foo::Bar"), "got: {:?}", names);
1583 assert!(names.contains(&"hello"), "got: {:?}", names);
1584 assert!(names.contains(&"_private_helper"), "got: {:?}", names);
1585
1586 let find = |name: &str| entities.iter().find(|e| e.name == name)
1587 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1588
1589 assert_eq!(find("Foo::Bar").entity_type, "package");
1590 assert_eq!(find("hello").entity_type, "function");
1591 assert_eq!(find("_private_helper").entity_type, "function");
1592 }
1593
1594 #[test]
1595 fn test_fortran_entity_extraction() {
1596 let code = r#"module math_utils
1597 implicit none
1598contains
1599 function add(a, b) result(c)
1600 integer, intent(in) :: a, b
1601 integer :: c
1602 c = a + b
1603 end function add
1604
1605 subroutine greet()
1606 print *, "hello"
1607 end subroutine greet
1608end module math_utils
1609
1610program main
1611 implicit none
1612 print *, "hello"
1613end program main
1614"#;
1615 let plugin = CodeParserPlugin;
1616 let entities = plugin.extract_entities(code, "test.f90");
1617 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1618
1619 assert!(names.contains(&"math_utils"), "got: {:?}", names);
1620 assert!(names.contains(&"add"), "got: {:?}", names);
1621 assert!(names.contains(&"greet"), "got: {:?}", names);
1622 assert!(names.contains(&"main"), "got: {:?}", names);
1623
1624 let find = |name: &str| entities.iter().find(|e| e.name == name)
1625 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1626
1627 assert_eq!(find("math_utils").entity_type, "module");
1628 assert_eq!(find("add").entity_type, "function");
1629 assert_eq!(find("greet").entity_type, "subroutine");
1630 assert_eq!(find("main").entity_type, "program");
1631
1632 assert!(find("add").parent_id.is_some());
1634 assert!(find("greet").parent_id.is_some());
1635 }
1636
1637 #[test]
1638 fn test_scala_entity_extraction() {
1639 let code = r#"
1640package com.example
1641
1642import scala.collection.mutable
1643
1644class UserService(val name: String) {
1645 def getUsers(): List[User] = db.findAll()
1646
1647 def createUser(user: User): Unit = db.save(user)
1648
1649 private def validate(user: User): Boolean = true
1650}
1651
1652object UserService {
1653 def apply(name: String): UserService = new UserService(name)
1654
1655 val DefaultName: String = "default"
1656}
1657
1658trait Repository[T] {
1659 def findById(id: String): Option[T]
1660 def findAll(): List[T]
1661}
1662
1663case class User(id: String, name: String)
1664
1665type UserId = String
1666"#;
1667 let plugin = CodeParserPlugin;
1668 let entities = plugin.extract_entities(code, "UserService.scala");
1669 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1670 eprintln!("Scala entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1671
1672 assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
1673 assert!(names.contains(&"Repository"), "Should find trait Repository, got: {:?}", names);
1674 assert!(names.contains(&"getUsers"), "Should find method getUsers, got: {:?}", names);
1675 assert!(names.contains(&"createUser"), "Should find method createUser, got: {:?}", names);
1676
1677 let get_users = entities.iter().find(|e| e.name == "getUsers").unwrap();
1679 assert!(get_users.parent_id.is_some(), "getUsers should have parent_id");
1680 }
1681
1682 #[test]
1683 fn test_scala3_entity_extraction() {
1684 let code = r#"
1685package com.example
1686
1687enum Color:
1688 case Red, Green, Blue
1689
1690enum Planet(mass: Double, radius: Double):
1691 case Mercury extends Planet(3.303e+23, 2.4397e6)
1692 case Venus extends Planet(4.869e+24, 6.0518e6)
1693
1694object Main:
1695 def main(args: Array[String]): Unit =
1696 println("Hello, World!")
1697
1698trait Greeter:
1699 def greet(name: String): String
1700
1701given Greeter with
1702 def greet(name: String): String = s"Hello, $name!"
1703
1704extension (s: String)
1705 def shout: String = s.toUpperCase + "!"
1706
1707type Predicate[A] = A => Boolean
1708"#;
1709 let plugin = CodeParserPlugin;
1710 let entities = plugin.extract_entities(code, "Main.scala");
1711 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1712 eprintln!("Scala 3 entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1713
1714 assert!(names.contains(&"Color"), "Should find enum Color, got: {:?}", names);
1715 assert!(names.contains(&"Planet"), "Should find enum Planet, got: {:?}", names);
1716 assert!(names.contains(&"Main"), "Should find object Main, got: {:?}", names);
1717 assert!(names.contains(&"Greeter"), "Should find trait Greeter, got: {:?}", names);
1718 assert!(names.contains(&"Predicate"), "Should find type alias Predicate, got: {:?}", names);
1719 }
1720
1721 #[test]
1722 fn test_zig_entity_extraction() {
1723 let code = r#"
1724const std = @import("std");
1725
1726pub const Point = struct {
1727 x: i32,
1728 y: i32,
1729};
1730
1731pub const Color = enum {
1732 red,
1733 green,
1734 blue,
1735};
1736
1737const Person = struct {
1738 name: []const u8,
1739 age: u32,
1740};
1741
1742pub fn greet(name: []const u8) void {
1743 std.debug.print("Hello, {s}!\n", .{name});
1744}
1745
1746fn add(a: i32, b: i32) i32 {
1747 return a + b;
1748}
1749
1750pub fn main() !void {
1751 greet("world");
1752}
1753
1754test "basic addition" {
1755 const result = add(2, 3);
1756 _ = result;
1757}
1758"#;
1759 let plugin = CodeParserPlugin;
1760 let entities = plugin.extract_entities(code, "main.zig");
1761 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1762 let types: std::collections::HashMap<&str, &str> = entities
1763 .iter()
1764 .map(|e| (e.name.as_str(), e.entity_type.as_str()))
1765 .collect();
1766
1767 assert!(names.contains(&"greet"), "Should find greet, got: {:?}", names);
1768 assert!(names.contains(&"add"), "Should find add, got: {:?}", names);
1769 assert!(names.contains(&"main"), "Should find main, got: {:?}", names);
1770 assert!(names.contains(&"Point"), "Should find Point, got: {:?}", names);
1771 assert!(names.contains(&"Color"), "Should find Color, got: {:?}", names);
1772 assert!(names.contains(&"Person"), "Should find Person, got: {:?}", names);
1773
1774 assert_eq!(types["greet"], "function");
1775 assert_eq!(types["add"], "function");
1776 assert_eq!(types["Point"], "struct");
1777 assert_eq!(types["Color"], "enum");
1778 assert_eq!(types["Person"], "struct");
1779 }
1780}