1mod entity_extractor;
2pub mod languages;
3
4use std::cell::RefCell;
5use std::collections::HashMap;
6
7use crate::model::entity::SemanticEntity;
8use crate::parser::plugin::SemanticParserPlugin;
9use languages::{get_all_code_extensions, get_language_config};
10use entity_extractor::extract_entities;
11
12pub struct CodeParserPlugin;
13
14thread_local! {
17 static PARSER_CACHE: RefCell<HashMap<&'static str, tree_sitter::Parser>> = RefCell::new(HashMap::new());
18}
19
20impl SemanticParserPlugin for CodeParserPlugin {
21 fn id(&self) -> &str {
22 "code"
23 }
24
25 fn extensions(&self) -> &[&str] {
26 get_all_code_extensions()
27 }
28
29 fn extract_entities(&self, content: &str, file_path: &str) -> Vec<SemanticEntity> {
30 self.extract_entities_with_tree(content, file_path).0
31 }
32
33 fn extract_entities_with_tree(
34 &self,
35 content: &str,
36 file_path: &str,
37 ) -> (Vec<SemanticEntity>, Option<tree_sitter::Tree>) {
38 let ext = std::path::Path::new(file_path)
39 .extension()
40 .and_then(|e| e.to_str())
41 .map(|e| format!(".{}", e.to_lowercase()))
42 .unwrap_or_default();
43
44 let config = match get_language_config(&ext) {
45 Some(c) => c,
46 None => {
47 match detect_ext_from_content(content)
49 .and_then(|se| get_language_config(&se))
50 {
51 Some(c) => c,
52 None => return (Vec::new(), None),
53 }
54 }
55 };
56
57 let language = match (config.get_language)() {
58 Some(lang) => lang,
59 None => return (Vec::new(), None),
60 };
61
62 PARSER_CACHE.with(|cache| {
63 let mut cache = cache.borrow_mut();
64 let parser = cache.entry(config.id).or_insert_with(|| {
65 let mut p = tree_sitter::Parser::new();
66 let _ = p.set_language(&language);
67 p
68 });
69
70 let tree = match parser.parse(content.as_bytes(), None) {
71 Some(t) => t,
72 None => return (Vec::new(), None),
73 };
74
75 let entities = extract_entities(&tree, file_path, config, content);
76 (entities, Some(tree))
77 })
78 }
79}
80
81use crate::parser::registry::detect_ext_from_content;
82
83#[cfg(test)]
84mod tests {
85 use super::*;
86
87 #[test]
88 fn test_java_entity_extraction() {
89 let code = r#"
90package com.example;
91
92import java.util.List;
93
94public class UserService {
95 private String name;
96
97 public UserService(String name) {
98 this.name = name;
99 }
100
101 public List<User> getUsers() {
102 return db.findAll();
103 }
104
105 public void createUser(User user) {
106 db.save(user);
107 }
108}
109
110interface Repository<T> {
111 T findById(String id);
112 List<T> findAll();
113}
114
115enum Status {
116 ACTIVE,
117 INACTIVE,
118 DELETED
119}
120"#;
121 let plugin = CodeParserPlugin;
122 let entities = plugin.extract_entities(code, "UserService.java");
123 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
124 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
125 eprintln!("Java entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
126
127 assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
128 assert!(names.contains(&"Repository"), "Should find interface Repository, got: {:?}", names);
129 assert!(names.contains(&"Status"), "Should find enum Status, got: {:?}", names);
130 }
131
132 #[test]
133 fn test_java_nested_methods() {
134 let code = r#"
135public class Calculator {
136 public int add(int a, int b) {
137 return a + b;
138 }
139
140 public int subtract(int a, int b) {
141 return a - b;
142 }
143}
144"#;
145 let plugin = CodeParserPlugin;
146 let entities = plugin.extract_entities(code, "Calculator.java");
147 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
148 eprintln!("Java nested: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
149
150 assert!(names.contains(&"Calculator"), "Should find Calculator class");
151 assert!(names.contains(&"add"), "Should find add method, got: {:?}", names);
152 assert!(names.contains(&"subtract"), "Should find subtract method, got: {:?}", names);
153
154 let add = entities.iter().find(|e| e.name == "add").unwrap();
156 assert!(add.parent_id.is_some(), "add should have parent_id");
157 }
158
159 #[test]
160 fn test_c_entity_extraction() {
161 let code = r#"
162#include <stdio.h>
163
164struct Point {
165 int x;
166 int y;
167};
168
169enum Color {
170 RED,
171 GREEN,
172 BLUE
173};
174
175typedef struct {
176 char name[50];
177 int age;
178} Person;
179
180void greet(const char* name) {
181 printf("Hello, %s!\n", name);
182}
183
184int add(int a, int b) {
185 return a + b;
186}
187
188int main() {
189 greet("world");
190 return 0;
191}
192"#;
193 let plugin = CodeParserPlugin;
194 let entities = plugin.extract_entities(code, "main.c");
195 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
196 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
197 eprintln!("C entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
198
199 assert!(names.contains(&"greet"), "Should find greet function, got: {:?}", names);
200 assert!(names.contains(&"add"), "Should find add function, got: {:?}", names);
201 assert!(names.contains(&"main"), "Should find main function, got: {:?}", names);
202 assert!(names.contains(&"Point"), "Should find Point struct, got: {:?}", names);
203 assert!(names.contains(&"Color"), "Should find Color enum, got: {:?}", names);
204 }
205
206 #[test]
207 fn test_cpp_entity_extraction() {
208 let code = "namespace math {\nclass Vector3 {\npublic:\n float length() const { return 0; }\n};\n}\nvoid greet() {}\n";
209 let plugin = CodeParserPlugin;
210 let entities = plugin.extract_entities(code, "main.cpp");
211 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
212 assert!(names.contains(&"math"), "got: {:?}", names);
213 assert!(names.contains(&"Vector3"), "got: {:?}", names);
214 assert!(names.contains(&"greet"), "got: {:?}", names);
215 }
216
217 #[test]
218 fn test_ruby_entity_extraction() {
219 let code = "module Auth\n class User\n def greet\n \"hi\"\n end\n end\nend\ndef helper(x)\n x * 2\nend\n";
220 let plugin = CodeParserPlugin;
221 let entities = plugin.extract_entities(code, "auth.rb");
222 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
223 assert!(names.contains(&"Auth"), "got: {:?}", names);
224 assert!(names.contains(&"User"), "got: {:?}", names);
225 assert!(names.contains(&"helper"), "got: {:?}", names);
226 }
227
228 #[test]
229 fn test_csharp_entity_extraction() {
230 let code = "namespace MyApp {\npublic class User {\n public string GetName() { return \"\"; }\n}\npublic enum Role { Admin, User }\n}\n";
231 let plugin = CodeParserPlugin;
232 let entities = plugin.extract_entities(code, "Models.cs");
233 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
234 assert!(names.contains(&"MyApp"), "got: {:?}", names);
235 assert!(names.contains(&"User"), "got: {:?}", names);
236 assert!(names.contains(&"Role"), "got: {:?}", names);
237 }
238
239 #[test]
240 fn test_swift_entity_extraction() {
241 let code = r#"
242import Foundation
243
244class UserService {
245 var name: String
246
247 init(name: String) {
248 self.name = name
249 }
250
251 func getUsers() -> [User] {
252 return db.findAll()
253 }
254}
255
256struct Point {
257 var x: Double
258 var y: Double
259}
260
261enum Status {
262 case active
263 case inactive
264 case deleted
265}
266
267protocol Repository {
268 associatedtype Item
269 func findById(id: String) -> Item?
270 func findAll() -> [Item]
271}
272
273func helper(x: Int) -> Int {
274 return x * 2
275}
276"#;
277 let plugin = CodeParserPlugin;
278 let entities = plugin.extract_entities(code, "UserService.swift");
279 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
280 eprintln!("Swift entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
281
282 assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
283 assert!(names.contains(&"Point"), "Should find struct Point, got: {:?}", names);
284 assert!(names.contains(&"Status"), "Should find enum Status, got: {:?}", names);
285 assert!(names.contains(&"Repository"), "Should find protocol Repository, got: {:?}", names);
286 assert!(names.contains(&"helper"), "Should find function helper, got: {:?}", names);
287 }
288
289 #[test]
290 fn test_elixir_entity_extraction() {
291 let code = r#"
292defmodule MyApp.Accounts do
293 def create_user(attrs) do
294 %User{}
295 |> User.changeset(attrs)
296 |> Repo.insert()
297 end
298
299 defp validate(attrs) do
300 # private helper
301 :ok
302 end
303
304 defmacro is_admin(user) do
305 quote do
306 unquote(user).role == :admin
307 end
308 end
309
310 defguard is_positive(x) when is_integer(x) and x > 0
311end
312
313defprotocol Printable do
314 def to_string(data)
315end
316
317defimpl Printable, for: Integer do
318 def to_string(i), do: Integer.to_string(i)
319end
320"#;
321 let plugin = CodeParserPlugin;
322 let entities = plugin.extract_entities(code, "accounts.ex");
323 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
324 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
325 eprintln!("Elixir entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
326
327 assert!(names.contains(&"MyApp.Accounts"), "Should find module, got: {:?}", names);
328 assert!(names.contains(&"create_user"), "Should find def, got: {:?}", names);
329 assert!(names.contains(&"validate"), "Should find defp, got: {:?}", names);
330 assert!(names.contains(&"is_admin"), "Should find defmacro, got: {:?}", names);
331 assert!(names.contains(&"Printable"), "Should find defprotocol, got: {:?}", names);
332
333 let create_user = entities.iter().find(|e| e.name == "create_user").unwrap();
335 assert!(create_user.parent_id.is_some(), "create_user should be nested under module");
336 }
337
338 #[test]
339 fn test_bash_entity_extraction() {
340 let code = r#"#!/bin/bash
341
342greet() {
343 echo "Hello, $1!"
344}
345
346function deploy {
347 echo "deploying..."
348}
349
350# not a function
351echo "main script"
352"#;
353 let plugin = CodeParserPlugin;
354 let entities = plugin.extract_entities(code, "deploy.sh");
355 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
356 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
357 eprintln!("Bash entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
358
359 assert!(names.contains(&"greet"), "Should find greet(), got: {:?}", names);
360 assert!(names.contains(&"deploy"), "Should find function deploy, got: {:?}", names);
361 assert_eq!(entities.len(), 2, "Should only find functions, got: {:?}", names);
362 }
363
364 #[test]
365 fn test_typescript_entity_extraction() {
366 let code = r#"
368export function hello(): string {
369 return "hello";
370}
371
372export class Greeter {
373 greet(name: string): string {
374 return `Hello, ${name}!`;
375 }
376}
377"#;
378 let plugin = CodeParserPlugin;
379 let entities = plugin.extract_entities(code, "test.ts");
380 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
381 assert!(names.contains(&"hello"), "Should find hello function");
382 assert!(names.contains(&"Greeter"), "Should find Greeter class");
383 }
384
385 #[test]
386 fn test_module_typescript_entity_extraction() {
387 let code = r#"
388export function hello(): string {
389 return "hello";
390}
391"#;
392 let plugin = CodeParserPlugin;
393 let entities = plugin.extract_entities(code, "test.mts");
394 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
395
396 assert!(names.contains(&"hello"), "Should find hello function");
397 }
398
399 #[test]
400 fn test_commonjs_typescript_entity_extraction() {
401 let code = r#"
402export class Greeter {
403 greet(name: string): string {
404 return `Hello, ${name}!`;
405 }
406}
407"#;
408 let plugin = CodeParserPlugin;
409 let entities = plugin.extract_entities(code, "test.cts");
410 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
411
412 assert!(names.contains(&"Greeter"), "Should find Greeter class");
413 assert!(names.contains(&"greet"), "Should find greet method");
414 }
415
416 #[test]
417 fn test_typescript_generator_function_entity_extraction() {
418 let code = r#"
419export async function* streamUsers(): AsyncGenerator<string> {
420 yield "alice";
421}
422"#;
423 let plugin = CodeParserPlugin;
424 let entities = plugin.extract_entities(code, "stream.ts");
425 let stream = entities.iter().find(|e| e.name == "streamUsers");
426
427 assert!(stream.is_some(), "Should find generator function, got: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
428 assert_eq!(stream.unwrap().entity_type, "function");
429 }
430
431 #[test]
432 fn test_javascript_generator_function_entity_extraction() {
433 let code = r#"
434export function* ids() {
435 yield 1;
436 yield 2;
437}
438"#;
439 let plugin = CodeParserPlugin;
440 let entities = plugin.extract_entities(code, "ids.js");
441 let ids = entities.iter().find(|e| e.name == "ids");
442
443 assert!(ids.is_some(), "Should find generator function, got: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
444 assert_eq!(ids.unwrap().entity_type, "function");
445 }
446
447 #[test]
448 fn test_nested_functions_typescript() {
449 let code = r#"
450function outer() {
451 function inner() {
452 return 42;
453 }
454 return inner();
455}
456"#;
457 let plugin = CodeParserPlugin;
458 let entities = plugin.extract_entities(code, "nested.ts");
459 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
460 eprintln!("Nested TS: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
461
462 assert!(names.contains(&"outer"), "Should find outer, got: {:?}", names);
463 assert!(names.contains(&"inner"), "Should find inner, got: {:?}", names);
464
465 let inner = entities.iter().find(|e| e.name == "inner").unwrap();
466 assert!(inner.parent_id.is_some(), "inner should have parent_id");
467 }
468
469 #[test]
470 fn test_nested_functions_python() {
471 let code = "def outer():\n def inner():\n return 42\n return inner()\n";
472 let plugin = CodeParserPlugin;
473 let entities = plugin.extract_entities(code, "nested.py");
474 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
475
476 assert!(names.contains(&"outer"), "got: {:?}", names);
477 assert!(names.contains(&"inner"), "got: {:?}", names);
478
479 let inner = entities.iter().find(|e| e.name == "inner").unwrap();
480 assert!(inner.parent_id.is_some(), "inner should have parent_id");
481 }
482
483 #[test]
484 fn test_nested_functions_rust() {
485 let code = "fn outer() {\n fn inner() -> i32 {\n 42\n }\n inner();\n}\n";
486 let plugin = CodeParserPlugin;
487 let entities = plugin.extract_entities(code, "nested.rs");
488 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
489
490 assert!(names.contains(&"outer"), "got: {:?}", names);
491 assert!(names.contains(&"inner"), "got: {:?}", names);
492
493 let inner = entities.iter().find(|e| e.name == "inner").unwrap();
494 assert!(inner.parent_id.is_some(), "inner should have parent_id");
495 }
496
497 #[test]
498 fn test_rust_impl_blocks_unique_names() {
499 let code = r#"
500trait Greeting {
501 fn greet(&self) -> String;
502}
503
504struct Person;
505struct Robot;
506struct Cat;
507
508impl Greeting for Person {
509 fn greet(&self) -> String { "Hello".to_string() }
510}
511
512impl Greeting for Robot {
513 fn greet(&self) -> String { "Beep".to_string() }
514}
515
516impl Greeting for Cat {
517 fn greet(&self) -> String { "Meow".to_string() }
518}
519"#;
520 let plugin = CodeParserPlugin;
521 let entities = plugin.extract_entities(code, "impls.rs");
522 let impl_entities: Vec<&_> = entities.iter()
523 .filter(|e| e.entity_type == "impl")
524 .collect();
525 let names: Vec<&str> = impl_entities.iter().map(|e| e.name.as_str()).collect();
526
527 assert_eq!(impl_entities.len(), 3, "Should find 3 impl blocks, got: {:?}", names);
528 assert!(names.contains(&"Greeting for Person"), "got: {:?}", names);
529 assert!(names.contains(&"Greeting for Robot"), "got: {:?}", names);
530 assert!(names.contains(&"Greeting for Cat"), "got: {:?}", names);
531 }
532
533 #[test]
534 fn test_nested_functions_go() {
535 let code = "package main\n\nfunc outer() {\n var x int = 42\n _ = x\n}\n";
537 let plugin = CodeParserPlugin;
538 let entities = plugin.extract_entities(code, "nested.go");
539 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
540
541 assert!(names.contains(&"outer"), "got: {:?}", names);
542 }
543
544 #[test]
545 fn test_renamed_function_same_structural_hash() {
546 let code_a = "def get_card():\n return db.query('cards')\n";
547 let code_b = "def get_card_1():\n return db.query('cards')\n";
548
549 let plugin = CodeParserPlugin;
550 let entities_a = plugin.extract_entities(code_a, "a.py");
551 let entities_b = plugin.extract_entities(code_b, "b.py");
552
553 assert_eq!(entities_a.len(), 1, "Should find one entity in a");
554 assert_eq!(entities_b.len(), 1, "Should find one entity in b");
555 assert_eq!(entities_a[0].name, "get_card");
556 assert_eq!(entities_b[0].name, "get_card_1");
557
558 assert_eq!(
560 entities_a[0].structural_hash, entities_b[0].structural_hash,
561 "Renamed function with identical body should have same structural_hash"
562 );
563
564 assert_ne!(
566 entities_a[0].content_hash, entities_b[0].content_hash,
567 "Content hash should differ since raw content includes the name"
568 );
569 }
570
571 #[test]
572 fn test_hcl_entity_extraction() {
573 let code = r#"
574region = "eu-west-1"
575
576variable "image_id" {
577 type = string
578}
579
580resource "aws_instance" "web" {
581 ami = var.image_id
582
583 lifecycle {
584 create_before_destroy = true
585 }
586}
587"#;
588 let plugin = CodeParserPlugin;
589 let entities = plugin.extract_entities(code, "main.tf");
590 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
591 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
592 eprintln!("HCL entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
593
594 assert!(names.contains(&"region"), "Should find top-level attribute, got: {:?}", names);
595 assert!(names.contains(&"variable.image_id"), "Should find variable block, got: {:?}", names);
596 assert!(names.contains(&"resource.aws_instance.web"), "Should find resource block, got: {:?}", names);
597 assert!(
598 names.contains(&"resource.aws_instance.web.lifecycle"),
599 "Should find nested lifecycle block with qualified name, got: {:?}",
600 names
601 );
602 assert!(!names.contains(&"ami"), "Should skip nested attributes inside blocks, got: {:?}", names);
603 assert!(
604 !names.contains(&"create_before_destroy"),
605 "Should skip nested attributes inside nested blocks, got: {:?}",
606 names
607 );
608
609 let lifecycle = entities
610 .iter()
611 .find(|e| e.name == "resource.aws_instance.web.lifecycle")
612 .unwrap();
613 assert!(lifecycle.parent_id.is_some(), "lifecycle should be nested under resource");
614 assert!(types.contains(&"attribute"), "Should preserve attribute entity type for top-level attributes");
615 }
616
617 #[test]
618 fn test_kotlin_entity_extraction() {
619 let code = r#"
620class UserService {
621 val name: String = ""
622
623 fun greet(): String {
624 return "Hello, $name"
625 }
626
627 companion object {
628 fun create(): UserService = UserService()
629 }
630}
631
632interface Repository {
633 fun findById(id: Int): Any?
634}
635
636object AppConfig {
637 val version = "1.0"
638}
639
640fun topLevel(x: Int): Int = x * 2
641"#;
642 let plugin = CodeParserPlugin;
643 let entities = plugin.extract_entities(code, "App.kt");
644 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
645 eprintln!("Kotlin entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
646 assert!(names.contains(&"UserService"), "got: {:?}", names);
647 assert!(names.contains(&"greet"), "got: {:?}", names);
648 assert!(names.contains(&"Repository"), "got: {:?}", names);
649 assert!(names.contains(&"findById"), "got: {:?}", names);
650 assert!(names.contains(&"AppConfig"), "got: {:?}", names);
651 assert!(names.contains(&"topLevel"), "got: {:?}", names);
652 }
653
654 #[test]
655 fn test_xml_entity_extraction() {
656 let code = r#"<?xml version="1.0" encoding="UTF-8"?>
657<project>
658 <groupId>com.example</groupId>
659 <artifactId>my-app</artifactId>
660 <dependencies>
661 <dependency>
662 <groupId>junit</groupId>
663 <artifactId>junit</artifactId>
664 </dependency>
665 </dependencies>
666 <build>
667 <plugins>
668 <plugin>
669 <groupId>org.apache.maven</groupId>
670 </plugin>
671 </plugins>
672 </build>
673</project>
674"#;
675 let plugin = CodeParserPlugin;
676 let entities = plugin.extract_entities(code, "pom.xml");
677 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
678 eprintln!("XML entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
679 assert!(names.contains(&"project"), "got: {:?}", names);
680 assert!(names.contains(&"dependencies"), "got: {:?}", names);
681 assert!(names.contains(&"build"), "got: {:?}", names);
682 }
683
684 #[test]
685 fn test_arrow_callback_scope_boundary_typescript() {
686 let code = r#"
690const activeQueues = [
691 { queue: queues.fooQueue, processor: foo.process },
692];
693
694activeQueues.forEach((handler: any) => {
695 const queue = handler.queue;
696 let retries = 0;
697
698 class QueueHandler {
699 handle() { return queue; }
700 }
701
702 function createHandler() {
703 return new QueueHandler();
704 }
705
706 queue.process((job) => {
707 const orderId = job.data.orderId;
708 return orderId;
709 });
710});
711
712function handleFailure(job: any, err: any) {
713 console.error('failed', err);
714}
715"#;
716 let plugin = CodeParserPlugin;
717 let entities = plugin.extract_entities(code, "process.ts");
718 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
719 let top_level: Vec<&str> = entities
720 .iter()
721 .filter(|e| e.parent_id.is_none())
722 .map(|e| e.name.as_str())
723 .collect();
724
725 assert!(top_level.contains(&"activeQueues"), "got: {:?}", top_level);
727 assert!(top_level.contains(&"handleFailure"), "got: {:?}", top_level);
728
729 assert!(names.contains(&"QueueHandler"), "got: {:?}", names);
731 assert!(names.contains(&"handle"), "got: {:?}", names);
732 assert!(names.contains(&"createHandler"), "got: {:?}", names);
733
734 assert!(!names.contains(&"queue"), "got: {:?}", names);
736 assert!(!names.contains(&"retries"), "got: {:?}", names);
737 assert!(!names.contains(&"orderId"), "got: {:?}", names);
738 }
739
740 #[test]
741 fn test_top_level_iife_wrapper_still_extracts_typescript_entities() {
742 let code = r#"
743function factory() {
744 class Foo {
745 method(): number {
746 return 1;
747 }
748 }
749
750 function bar(): Foo {
751 return new Foo();
752 }
753}
754
755factory();
756"#;
757 let plugin = CodeParserPlugin;
758 let entities = plugin.extract_entities(code, "wrapped.ts");
759 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
760 assert!(
761 names.contains(&"factory"),
762 "Should find top-level wrapper function, got: {:?}",
763 names
764 );
765 assert!(
766 names.contains(&"Foo"),
767 "Should find class inside top-level wrapper, got: {:?}",
768 names
769 );
770 assert!(
771 names.contains(&"bar"),
772 "Should find function inside top-level wrapper, got: {:?}",
773 names
774 );
775 }
776
777 #[test]
778 fn test_top_level_iife_still_extracts_typescript_entities() {
779 let code = r#"
780(() => {
781 class Foo {
782 method(): number {
783 return 1;
784 }
785 }
786
787 function bar(): Foo {
788 return new Foo();
789 }
790})();
791"#;
792 let plugin = CodeParserPlugin;
793 let entities = plugin.extract_entities(code, "iife.ts");
794 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
795 assert!(
796 names.contains(&"Foo"),
797 "Should find class inside top-level IIFE, got: {:?}",
798 names
799 );
800 assert!(
801 names.contains(&"bar"),
802 "Should find function inside top-level IIFE, got: {:?}",
803 names
804 );
805 }
806
807 #[test]
808 fn test_function_locals_not_extracted_as_nested_entities_typescript() {
809 let code = r#"
810export default function foo() {
811 const x = 1;
812 return x;
813}
814"#;
815 let plugin = CodeParserPlugin;
816 let entities = plugin.extract_entities(code, "default-export.ts");
817 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
818 assert!(
819 names.contains(&"foo"),
820 "Should find exported function, got: {:?}",
821 names
822 );
823 assert!(
824 !names.contains(&"x"),
825 "Local inside function should not be extracted as an entity, got: {:?}",
826 names
827 );
828 }
829
830 #[test]
831 fn test_function_expression_scope_boundary_typescript() {
832 let code = r#"
835const foo = function namedExpr(x: number) {
836 const inner = x + 1;
837 return inner;
838};
839
840const bar = function(y: number) {
841 const local = y * 2;
842 return local;
843};
844
845const items = [1, 2, 3];
846
847items.forEach(function process(item) {
848 const doubled = item * 2;
849 console.log(doubled);
850});
851"#;
852 let plugin = CodeParserPlugin;
853 let entities = plugin.extract_entities(code, "funexpr.ts");
854 let top_level: Vec<&str> = entities
855 .iter()
856 .filter(|e| e.parent_id.is_none())
857 .map(|e| e.name.as_str())
858 .collect();
859 let find = |name: &str| entities.iter().find(|e| e.name == name).unwrap();
860 let all_names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
861
862 assert!(top_level.contains(&"foo"), "got: {:?}", top_level);
865 assert!(top_level.contains(&"bar"), "got: {:?}", top_level);
866 assert!(top_level.contains(&"items"), "got: {:?}", top_level);
867 assert_eq!(find("foo").entity_type, "function");
868 assert_eq!(find("bar").entity_type, "function");
869 assert_eq!(find("items").entity_type, "variable");
870
871 assert!(!all_names.contains(&"inner"), "got: {:?}", all_names);
873 assert!(!all_names.contains(&"local"), "got: {:?}", all_names);
874 assert!(!all_names.contains(&"doubled"), "got: {:?}", all_names);
875
876 assert!(!top_level.contains(&"process"), "got: {:?}", top_level);
878 }
879
880 #[test]
881 fn test_variable_assigned_arrow_extracts_inner_entities() {
882 let code = r#"
885const handler = () => {
886 class Inner {
887 run() { return 1; }
888 }
889
890 function make() {
891 return new Inner();
892 }
893
894 const local = 42;
895};
896"#;
897 let plugin = CodeParserPlugin;
898 let entities = plugin.extract_entities(code, "assigned.ts");
899 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
900 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
901
902 assert_eq!(handler.entity_type, "function");
903 assert!(names.contains(&"handler"), "got: {:?}", names);
904 assert!(names.contains(&"Inner"), "got: {:?}", names);
905 assert!(names.contains(&"run"), "got: {:?}", names);
906 assert!(names.contains(&"make"), "got: {:?}", names);
907 assert!(!names.contains(&"local"), "got: {:?}", names);
908 }
909
910 #[test]
911 fn test_variable_assigned_function_expression_extracts_inner_entities() {
912 let code = r#"
914const handler = function() {
915 class Inner {}
916 function make() { return new Inner(); }
917 const local = 42;
918};
919"#;
920 let plugin = CodeParserPlugin;
921 let entities = plugin.extract_entities(code, "funexpr-inner.ts");
922 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
923 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
924
925 assert_eq!(handler.entity_type, "function");
926 assert!(names.contains(&"handler"), "got: {:?}", names);
927 assert!(names.contains(&"Inner"), "got: {:?}", names);
928 assert!(names.contains(&"make"), "got: {:?}", names);
929 assert!(!names.contains(&"local"), "got: {:?}", names);
930 }
931
932 #[test]
933 fn test_let_assigned_arrow_stays_variable_typescript() {
934 let code = r#"
935let handler = () => {
936 return 42;
937};
938"#;
939 let plugin = CodeParserPlugin;
940 let entities = plugin.extract_entities(code, "let-assigned.ts");
941 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
942
943 assert_eq!(handler.entity_type, "variable");
944 }
945
946 #[test]
947 fn test_const_assigned_arrow_promoted_to_function_javascript() {
948 let code = r#"
949const handler = () => {
950 return 42;
951};
952"#;
953 let plugin = CodeParserPlugin;
954 let entities = plugin.extract_entities(code, "handler.js");
955 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
956
957 assert_eq!(handler.entity_type, "function");
958 }
959
960 #[test]
961 fn test_go_var_declaration() {
962 let code = r#"package featuremgmt
963
964type FeatureFlag struct {
965 Name string
966 Description string
967 Stage string
968}
969
970var standardFeatureFlags = []FeatureFlag{
971 {
972 Name: "panelTitleSearch",
973 Description: "Search for dashboards using panel title",
974 Stage: "PublicPreview",
975 },
976}
977
978func GetFlags() []FeatureFlag {
979 return standardFeatureFlags
980}
981"#;
982 let plugin = CodeParserPlugin;
983 let entities = plugin.extract_entities(code, "flags.go");
984 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
985 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
986 eprintln!("Go entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
987
988 assert!(names.contains(&"FeatureFlag"), "Should find type FeatureFlag, got: {:?}", names);
989 assert!(names.contains(&"standardFeatureFlags"), "Should find var standardFeatureFlags, got: {:?}", names);
990 assert!(names.contains(&"GetFlags"), "Should find func GetFlags, got: {:?}", names);
991 }
992
993 #[test]
994 fn test_go_grouped_var_declaration() {
995 let code = r#"package test
996
997var (
998 simple = 42
999 flags = []string{"a", "b"}
1000)
1001
1002const (
1003 x = 1
1004 y = 2
1005)
1006
1007func main() {}
1008"#;
1009 let plugin = CodeParserPlugin;
1010 let entities = plugin.extract_entities(code, "test.go");
1011 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1012 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1013 eprintln!("Go grouped entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
1014
1015 assert!(names.contains(&"flags") || names.contains(&"simple"), "Should find grouped var, got: {:?}", names);
1016 assert!(names.contains(&"x"), "Should find grouped const x, got: {:?}", names);
1017 assert!(names.contains(&"main"), "Should find func main, got: {:?}", names);
1018 }
1019
1020 #[test]
1021 fn test_dart_entity_extraction() {
1022 let code = r#"
1023import 'dart:math';
1024
1025class Calculator {
1026 final String name;
1027
1028 Calculator(this.name);
1029
1030 Calculator.withDefault() : name = 'default';
1031
1032 factory Calculator.create(String name) {
1033 return Calculator(name);
1034 }
1035
1036 int add(int a, int b) {
1037 return a + b;
1038 }
1039
1040 int get doubleAdd => add(1, 1) * 2;
1041
1042 set label(String value) {
1043 // no-op
1044 }
1045
1046 int operator +(Calculator other) {
1047 return 0;
1048 }
1049}
1050
1051mixin Loggable {
1052 void log(String message) {
1053 print(message);
1054 }
1055}
1056
1057extension StringExt on String {
1058 bool get isBlank => trim().isEmpty;
1059}
1060
1061enum Status {
1062 active,
1063 inactive;
1064
1065 String display() => name.toUpperCase();
1066}
1067
1068typedef Callback = void Function(int);
1069
1070int add(int a, int b) {
1071 return a + b;
1072}
1073
1074extension type Wrapper(int value) implements int {}
1075"#;
1076 let plugin = CodeParserPlugin;
1077 let entities = plugin.extract_entities(code, "calculator.dart");
1078 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1079 eprintln!(
1080 "Dart entities: {:?}",
1081 entities
1082 .iter()
1083 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1084 .collect::<Vec<_>>()
1085 );
1086
1087 assert!(names.contains(&"Calculator"), "Should find class, got: {:?}", names);
1089 assert!(names.contains(&"Loggable"), "Should find mixin, got: {:?}", names);
1090 assert!(names.contains(&"StringExt"), "Should find extension, got: {:?}", names);
1091 assert!(names.contains(&"Status"), "Should find enum, got: {:?}", names);
1092 assert!(names.contains(&"Callback"), "Should find typedef, got: {:?}", names);
1093 assert!(names.contains(&"add"), "Should find top-level function, got: {:?}", names);
1094 assert!(names.contains(&"Wrapper"), "Should find extension type, got: {:?}", names);
1095
1096 let add_method = entities.iter().find(|e| e.name == "add" && e.parent_id.is_some());
1098 assert!(add_method.is_some(), "Should find add method inside Calculator");
1099 assert_eq!(add_method.unwrap().entity_type, "method");
1100
1101 let unnamed_ctor = entities.iter().find(|e| e.name == "Calculator" && e.entity_type == "constructor");
1103 assert!(unnamed_ctor.is_some(), "Should find unnamed constructor");
1104 let named_ctor = entities.iter().find(|e| e.name == "Calculator.withDefault");
1105 assert!(named_ctor.is_some(), "Should find named constructor Calculator.withDefault, got: {:?}", names);
1106 assert_eq!(named_ctor.unwrap().entity_type, "constructor");
1107 assert_ne!(unnamed_ctor.unwrap().id, named_ctor.unwrap().id, "Named and unnamed constructors must have different entity IDs");
1108
1109 let factory_ctor = entities.iter().find(|e| e.name == "Calculator.create");
1111 assert!(factory_ctor.is_some(), "Should find factory constructor Calculator.create, got: {:?}", names);
1112 assert_eq!(factory_ctor.unwrap().entity_type, "constructor");
1113
1114 let getter = entities.iter().find(|e| e.name == "doubleAdd");
1116 assert!(getter.is_some(), "Should find getter doubleAdd");
1117 assert_eq!(getter.unwrap().entity_type, "getter");
1118
1119 let setter = entities.iter().find(|e| e.name == "label");
1120 assert!(setter.is_some(), "Should find setter label");
1121 assert_eq!(setter.unwrap().entity_type, "setter");
1122
1123 let operator = entities.iter().find(|e| e.name == "operator +");
1124 assert!(operator.is_some(), "Should find operator +");
1125 assert_eq!(operator.unwrap().entity_type, "method");
1126
1127 let log_method = entities.iter().find(|e| e.name == "log");
1129 assert!(log_method.is_some(), "Should find log in Loggable");
1130 assert!(log_method.unwrap().parent_id.is_some(), "log should have parent_id");
1131
1132 let callback = entities.iter().find(|e| e.name == "Callback").unwrap();
1134 assert_eq!(callback.entity_type, "type", "typedef should map to 'type'");
1135
1136 let loggable = entities.iter().find(|e| e.name == "Loggable").unwrap();
1137 assert_eq!(loggable.entity_type, "mixin");
1138
1139 let ext = entities.iter().find(|e| e.name == "StringExt").unwrap();
1140 assert_eq!(ext.entity_type, "extension");
1141
1142 let wrapper = entities.iter().find(|e| e.name == "Wrapper").unwrap();
1143 assert_eq!(wrapper.entity_type, "extension");
1144 }
1145
1146 #[test]
1147 fn test_dart_top_level_function_includes_body() {
1148 let code = r#"
1149int add(int a, int b) {
1150 return a + b;
1151}
1152
1153String greet(String name) => 'Hello, $name!';
1154"#;
1155 let plugin = CodeParserPlugin;
1156 let entities = plugin.extract_entities(code, "funcs.dart");
1157 eprintln!(
1158 "Dart top-level: {:?}",
1159 entities
1160 .iter()
1161 .map(|e| (&e.name, &e.entity_type, &e.content))
1162 .collect::<Vec<_>>()
1163 );
1164
1165 let add_fn = entities.iter().find(|e| e.name == "add").unwrap();
1166 assert!(
1167 add_fn.content.contains("return a + b"),
1168 "Top-level function content should include the body, got: {:?}",
1169 add_fn.content
1170 );
1171
1172 let greet_fn = entities.iter().find(|e| e.name == "greet").unwrap();
1173 assert!(
1174 greet_fn.content.contains("Hello"),
1175 "Expression body should be included, got: {:?}",
1176 greet_fn.content
1177 );
1178
1179 let code_v2 = r#"
1181int add(int a, int b) {
1182 return a * b;
1183}
1184
1185String greet(String name) => 'Hello, $name!';
1186"#;
1187 let entities_v2 = plugin.extract_entities(code_v2, "funcs.dart");
1188 let add_v2 = entities_v2.iter().find(|e| e.name == "add").unwrap();
1189 assert_ne!(
1190 add_fn.content_hash, add_v2.content_hash,
1191 "Body change should produce different content_hash"
1192 );
1193
1194 let greet_v2 = entities_v2.iter().find(|e| e.name == "greet").unwrap();
1196 assert_eq!(
1197 greet_fn.content_hash, greet_v2.content_hash,
1198 "Unchanged function should keep the same content_hash"
1199 );
1200 }
1201
1202 #[test]
1203 fn test_dart_renamed_named_constructor_same_structural_hash() {
1204 let code_a = r#"
1205class Foo {
1206 Foo.fromJson(Map<String, dynamic> json) {
1207 print(json);
1208 }
1209}
1210"#;
1211 let code_b = r#"
1212class Foo {
1213 Foo.fromMap(Map<String, dynamic> json) {
1214 print(json);
1215 }
1216}
1217"#;
1218 let plugin = CodeParserPlugin;
1219 let entities_a = plugin.extract_entities(code_a, "a.dart");
1220 let entities_b = plugin.extract_entities(code_b, "b.dart");
1221
1222 let ctor_a = entities_a.iter().find(|e| e.name == "Foo.fromJson").unwrap();
1223 let ctor_b = entities_b.iter().find(|e| e.name == "Foo.fromMap").unwrap();
1224
1225 assert_eq!(
1226 ctor_a.structural_hash, ctor_b.structural_hash,
1227 "Renamed named constructor with identical body should have same structural_hash"
1228 );
1229 assert_ne!(
1230 ctor_a.content_hash, ctor_b.content_hash,
1231 "Content hash should differ since raw content includes the name"
1232 );
1233 }
1234
1235 #[test]
1236 fn test_dart_top_level_getter_setter() {
1237 let code = r#"
1238int _value = 0;
1239
1240int get currentValue {
1241 return _value;
1242}
1243
1244set currentValue(int v) {
1245 _value = v;
1246}
1247"#;
1248 let plugin = CodeParserPlugin;
1249 let entities = plugin.extract_entities(code, "accessors.dart");
1250 eprintln!(
1251 "Dart top-level accessors: {:?}",
1252 entities
1253 .iter()
1254 .map(|e| (&e.name, &e.entity_type, &e.content))
1255 .collect::<Vec<_>>()
1256 );
1257
1258 let getter = entities.iter().find(|e| e.name == "currentValue" && e.entity_type == "getter");
1259 assert!(getter.is_some(), "Should find top-level getter, got: {:?}",
1260 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1261 assert!(
1262 getter.unwrap().content.contains("return _value"),
1263 "Top-level getter content should include the body"
1264 );
1265 assert!(getter.unwrap().parent_id.is_none(), "Top-level getter should have no parent");
1266
1267 let setter = entities.iter().find(|e| e.name == "currentValue" && e.entity_type == "function");
1271 assert!(setter.is_some(), "Should find top-level setter as function, got: {:?}",
1272 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1273 assert!(
1274 setter.unwrap().content.contains("_value = v"),
1275 "Top-level setter content should include the body"
1276 );
1277 }
1278
1279 #[test]
1280 fn test_dart_field_entity_type() {
1281 let code = r#"
1282class Config {
1283 final String name;
1284 static const int maxRetries = 3;
1285}
1286"#;
1287 let plugin = CodeParserPlugin;
1288 let entities = plugin.extract_entities(code, "config.dart");
1289 eprintln!(
1290 "Dart fields: {:?}",
1291 entities
1292 .iter()
1293 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1294 .collect::<Vec<_>>()
1295 );
1296
1297 let name_field = entities.iter().find(|e| e.name == "name" && e.parent_id.is_some());
1298 assert!(name_field.is_some(), "Should find field 'name', got: {:?}",
1299 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1300 assert_eq!(name_field.unwrap().entity_type, "field");
1301
1302 let max_retries = entities.iter().find(|e| e.name == "maxRetries");
1303 assert!(max_retries.is_some(), "Should find field 'maxRetries', got: {:?}",
1304 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1305 assert_eq!(max_retries.unwrap().entity_type, "field");
1306 }
1307
1308 #[test]
1309 fn test_dart_identifier_list_fields() {
1310 let code = r#"
1314abstract class Shape {
1315 abstract double x, y;
1316 abstract String label;
1317}
1318"#;
1319 let plugin = CodeParserPlugin;
1320 let entities = plugin.extract_entities(code, "shape.dart");
1321 eprintln!(
1322 "Dart identifier_list fields: {:?}",
1323 entities
1324 .iter()
1325 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1326 .collect::<Vec<_>>()
1327 );
1328
1329 let x_field = entities.iter().find(|e| e.name == "x");
1330 assert!(x_field.is_some(), "Should find field 'x' from identifier_list, got: {:?}",
1331 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1332 assert_eq!(x_field.unwrap().entity_type, "field");
1333 assert!(x_field.unwrap().parent_id.is_some(), "field 'x' should be nested under Shape");
1334
1335 let label_field = entities.iter().find(|e| e.name == "label");
1336 assert!(label_field.is_some(), "Should find field 'label' from single-element identifier_list, got: {:?}",
1337 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1338 assert_eq!(label_field.unwrap().entity_type, "field");
1339 }
1340
1341 #[test]
1342 fn test_ocaml_entity_extraction() {
1343 let code = r#"
1344type color = Red | Green | Blue
1345
1346type point = {
1347 x : float;
1348 y : float;
1349}
1350
1351exception Not_found of string
1352
1353let greet name =
1354 Printf.printf "Hello, %s!\n" name
1355
1356let add a b = a + b
1357
1358let version = "1.0"
1359
1360let color_to_string = function
1361 | Red -> "red"
1362 | Blue -> "blue"
1363
1364let inc = fun x -> x + 1
1365
1366module MyModule = struct
1367 let helper x = x * 2
1368end
1369
1370module type Printable = sig
1371 val to_string : 'a -> string
1372end
1373
1374external caml_input : in_channel -> bytes -> int -> int -> int = "caml_input"
1375
1376class point_class x_init = object
1377 val mutable x = x_init
1378 method get_x = x
1379end
1380
1381class type measurable = object
1382 method measure : float
1383end
1384"#;
1385 let plugin = CodeParserPlugin;
1386 let entities = plugin.extract_entities(code, "example.ml");
1387 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1388 eprintln!("OCaml entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1389
1390 let find = |name: &str| entities.iter().find(|e| e.name == name)
1391 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1392
1393 assert_eq!(find("color").entity_type, "type");
1394 assert_eq!(find("point").entity_type, "type");
1395 assert_eq!(find("Not_found").entity_type, "exception");
1396 assert_eq!(find("greet").entity_type, "function");
1397 assert_eq!(find("add").entity_type, "function");
1398 assert_eq!(find("version").entity_type, "value");
1399 assert_eq!(find("color_to_string").entity_type, "function");
1400 assert_eq!(find("inc").entity_type, "function");
1401 assert_eq!(find("MyModule").entity_type, "module");
1402 assert_eq!(find("Printable").entity_type, "module_type");
1403 assert_eq!(find("caml_input").entity_type, "external");
1404 assert_eq!(find("point_class").entity_type, "class");
1405 assert_eq!(find("measurable").entity_type, "class_type");
1406 }
1407
1408 #[test]
1409 fn test_ocaml_nested_module_entities() {
1410 let code = r#"
1411module Outer = struct
1412 let x = 42
1413
1414 module Inner = struct
1415 let y = 0
1416 end
1417end
1418"#;
1419 let plugin = CodeParserPlugin;
1420 let entities = plugin.extract_entities(code, "nested.ml");
1421 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1422 eprintln!("OCaml nested: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
1423
1424 let find = |name: &str| entities.iter().find(|e| e.name == name)
1425 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1426
1427 let outer = find("Outer");
1428 let x = find("x");
1429 let inner = find("Inner");
1430 let y = find("y");
1431
1432 assert_eq!(outer.entity_type, "module");
1433 assert_eq!(x.entity_type, "value");
1434 assert_eq!(inner.entity_type, "module");
1435 assert_eq!(y.entity_type, "value");
1436
1437 assert!(x.parent_id.as_ref().is_some_and(|p| p == &outer.id), "x should be nested under Outer");
1438 assert!(inner.parent_id.as_ref().is_some_and(|p| p == &outer.id), "Inner should be nested under Outer");
1439 assert!(y.parent_id.as_ref().is_some_and(|p| p == &inner.id), "y should be nested under Inner");
1440 }
1441
1442 #[test]
1443 fn test_ocaml_interface_entity_extraction() {
1444 let code = r#"
1445type t
1446
1447val create : string -> t
1448val to_string : t -> string
1449
1450exception Invalid_input of string
1451
1452module type Serializable = sig
1453 val serialize : t -> string
1454end
1455"#;
1456 let plugin = CodeParserPlugin;
1457 let entities = plugin.extract_entities(code, "example.mli");
1458 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1459 eprintln!("OCaml interface entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1460
1461 let find = |name: &str| entities.iter().find(|e| e.name == name)
1462 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1463
1464 assert_eq!(find("t").entity_type, "type");
1465 assert_eq!(find("create").entity_type, "val");
1466 assert_eq!(find("to_string").entity_type, "val");
1467 assert_eq!(find("Invalid_input").entity_type, "exception");
1468 assert_eq!(find("Serializable").entity_type, "module_type");
1469 }
1470
1471 #[test]
1472 fn test_ocaml_mutual_recursion_let() {
1473 let code = r#"
1474let rec even n = (n = 0) || odd (n - 1)
1475and odd n = (n <> 0) && even (n - 1)
1476
1477let rec ping x = pong (x - 1)
1478and pong x = if x <= 0 then 0 else ping (x - 1)
1479"#;
1480 let plugin = CodeParserPlugin;
1481 let entities = plugin.extract_entities(code, "mutual.ml");
1482 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1483 eprintln!("OCaml mutual let: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1484
1485 let find = |name: &str| entities.iter().find(|e| e.name == name)
1486 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1487
1488 assert_eq!(find("even").entity_type, "function");
1489 assert_eq!(find("odd").entity_type, "function");
1490 assert_eq!(find("ping").entity_type, "function");
1491 assert_eq!(find("pong").entity_type, "function");
1492 }
1493
1494 #[test]
1495 fn test_ocaml_mutual_recursion_module() {
1496 let code = r#"
1497module rec A : sig val x : int end = struct
1498 let x = B.y + 1
1499end
1500and B : sig val y : int end = struct
1501 let y = 0
1502end
1503"#;
1504 let plugin = CodeParserPlugin;
1505 let entities = plugin.extract_entities(code, "mutual_mod.ml");
1506 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1507 eprintln!("OCaml mutual module: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
1508
1509 let find = |name: &str| entities.iter().find(|e| e.name == name)
1510 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1511
1512 let a = find("A");
1513 let b = find("B");
1514 assert_eq!(a.entity_type, "module");
1515 assert_eq!(b.entity_type, "module");
1516
1517 let x = find("x");
1518 let y = find("y");
1519 assert!(x.parent_id.as_ref().is_some_and(|p| p == &a.id), "x should be nested under A");
1520 assert!(y.parent_id.as_ref().is_some_and(|p| p == &b.id), "y should be nested under B");
1521 }
1522
1523 #[test]
1524 fn test_ocaml_destructured_let() {
1525 let code = r#"
1526let (a, b) = (1, 2)
1527
1528let { x; y } = point
1529
1530let simple = 42
1531"#;
1532 let plugin = CodeParserPlugin;
1533 let entities = plugin.extract_entities(code, "destruct.ml");
1534 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1535 eprintln!("OCaml destructured: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1536
1537 let find = |name: &str| entities.iter().find(|e| e.name == name)
1538 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1539
1540 assert_eq!(find("a").entity_type, "value");
1541 assert_eq!(find("b").entity_type, "value");
1542 assert_eq!(find("x").entity_type, "value");
1543 assert_eq!(find("y").entity_type, "value");
1544 assert_eq!(find("simple").entity_type, "value");
1545 }
1546
1547 #[test]
1548 fn test_ocaml_mutual_recursion_class() {
1549 let code = r#"
1550class foo = object
1551 method x = 1
1552end
1553and bar = object
1554 method y = 2
1555end
1556"#;
1557 let plugin = CodeParserPlugin;
1558 let entities = plugin.extract_entities(code, "classes.ml");
1559 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1560 eprintln!("OCaml mutual class: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1561
1562 let find = |name: &str| entities.iter().find(|e| e.name == name)
1563 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1564
1565 assert_eq!(find("foo").entity_type, "class");
1566 assert_eq!(find("bar").entity_type, "class");
1567 }
1568
1569 #[test]
1570 fn test_perl_entity_extraction() {
1571 let code = r#"package Foo::Bar;
1572
1573use strict;
1574use warnings;
1575
1576sub hello {
1577 my ($self, $name) = @_;
1578 print "Hello, $name!\n";
1579}
1580
1581sub _private_helper {
1582 return 42;
1583}
1584
15851;
1586"#;
1587 let plugin = CodeParserPlugin;
1588 let entities = plugin.extract_entities(code, "Foo/Bar.pm");
1589 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1590
1591 assert!(names.contains(&"Foo::Bar"), "got: {:?}", names);
1592 assert!(names.contains(&"hello"), "got: {:?}", names);
1593 assert!(names.contains(&"_private_helper"), "got: {:?}", names);
1594
1595 let find = |name: &str| entities.iter().find(|e| e.name == name)
1596 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1597
1598 assert_eq!(find("Foo::Bar").entity_type, "package");
1599 assert_eq!(find("hello").entity_type, "function");
1600 assert_eq!(find("_private_helper").entity_type, "function");
1601 }
1602
1603 #[test]
1604 fn test_fortran_entity_extraction() {
1605 let code = r#"module math_utils
1606 implicit none
1607contains
1608 function add(a, b) result(c)
1609 integer, intent(in) :: a, b
1610 integer :: c
1611 c = a + b
1612 end function add
1613
1614 subroutine greet()
1615 print *, "hello"
1616 end subroutine greet
1617end module math_utils
1618
1619program main
1620 implicit none
1621 print *, "hello"
1622end program main
1623"#;
1624 let plugin = CodeParserPlugin;
1625 let entities = plugin.extract_entities(code, "test.f90");
1626 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1627
1628 assert!(names.contains(&"math_utils"), "got: {:?}", names);
1629 assert!(names.contains(&"add"), "got: {:?}", names);
1630 assert!(names.contains(&"greet"), "got: {:?}", names);
1631 assert!(names.contains(&"main"), "got: {:?}", names);
1632
1633 let find = |name: &str| entities.iter().find(|e| e.name == name)
1634 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1635
1636 assert_eq!(find("math_utils").entity_type, "module");
1637 assert_eq!(find("add").entity_type, "function");
1638 assert_eq!(find("greet").entity_type, "subroutine");
1639 assert_eq!(find("main").entity_type, "program");
1640
1641 assert!(find("add").parent_id.is_some());
1643 assert!(find("greet").parent_id.is_some());
1644 }
1645
1646 #[test]
1647 fn test_scala_entity_extraction() {
1648 let code = r#"
1649package com.example
1650
1651import scala.collection.mutable
1652
1653class UserService(val name: String) {
1654 def getUsers(): List[User] = db.findAll()
1655
1656 def createUser(user: User): Unit = db.save(user)
1657
1658 private def validate(user: User): Boolean = true
1659}
1660
1661object UserService {
1662 def apply(name: String): UserService = new UserService(name)
1663
1664 val DefaultName: String = "default"
1665}
1666
1667trait Repository[T] {
1668 def findById(id: String): Option[T]
1669 def findAll(): List[T]
1670}
1671
1672case class User(id: String, name: String)
1673
1674type UserId = String
1675"#;
1676 let plugin = CodeParserPlugin;
1677 let entities = plugin.extract_entities(code, "UserService.scala");
1678 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1679 eprintln!("Scala entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1680
1681 assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
1682 assert!(names.contains(&"Repository"), "Should find trait Repository, got: {:?}", names);
1683 assert!(names.contains(&"getUsers"), "Should find method getUsers, got: {:?}", names);
1684 assert!(names.contains(&"createUser"), "Should find method createUser, got: {:?}", names);
1685
1686 let get_users = entities.iter().find(|e| e.name == "getUsers").unwrap();
1688 assert!(get_users.parent_id.is_some(), "getUsers should have parent_id");
1689 }
1690
1691 #[test]
1692 fn test_scala3_entity_extraction() {
1693 let code = r#"
1694package com.example
1695
1696enum Color:
1697 case Red, Green, Blue
1698
1699enum Planet(mass: Double, radius: Double):
1700 case Mercury extends Planet(3.303e+23, 2.4397e6)
1701 case Venus extends Planet(4.869e+24, 6.0518e6)
1702
1703object Main:
1704 def main(args: Array[String]): Unit =
1705 println("Hello, World!")
1706
1707trait Greeter:
1708 def greet(name: String): String
1709
1710given Greeter with
1711 def greet(name: String): String = s"Hello, $name!"
1712
1713extension (s: String)
1714 def shout: String = s.toUpperCase + "!"
1715
1716type Predicate[A] = A => Boolean
1717"#;
1718 let plugin = CodeParserPlugin;
1719 let entities = plugin.extract_entities(code, "Main.scala");
1720 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1721 eprintln!("Scala 3 entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1722
1723 assert!(names.contains(&"Color"), "Should find enum Color, got: {:?}", names);
1724 assert!(names.contains(&"Planet"), "Should find enum Planet, got: {:?}", names);
1725 assert!(names.contains(&"Main"), "Should find object Main, got: {:?}", names);
1726 assert!(names.contains(&"Greeter"), "Should find trait Greeter, got: {:?}", names);
1727 assert!(names.contains(&"Predicate"), "Should find type alias Predicate, got: {:?}", names);
1728 }
1729
1730 #[test]
1731 fn test_zig_entity_extraction() {
1732 let code = r#"
1733const std = @import("std");
1734
1735pub const Point = struct {
1736 x: i32,
1737 y: i32,
1738};
1739
1740pub const Color = enum {
1741 red,
1742 green,
1743 blue,
1744};
1745
1746const Person = struct {
1747 name: []const u8,
1748 age: u32,
1749};
1750
1751pub fn greet(name: []const u8) void {
1752 std.debug.print("Hello, {s}!\n", .{name});
1753}
1754
1755fn add(a: i32, b: i32) i32 {
1756 return a + b;
1757}
1758
1759pub fn main() !void {
1760 greet("world");
1761}
1762
1763test "basic addition" {
1764 const result = add(2, 3);
1765 _ = result;
1766}
1767"#;
1768 let plugin = CodeParserPlugin;
1769 let entities = plugin.extract_entities(code, "main.zig");
1770 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1771 let types: std::collections::HashMap<&str, &str> = entities
1772 .iter()
1773 .map(|e| (e.name.as_str(), e.entity_type.as_str()))
1774 .collect();
1775
1776 assert!(names.contains(&"greet"), "Should find greet, got: {:?}", names);
1777 assert!(names.contains(&"add"), "Should find add, got: {:?}", names);
1778 assert!(names.contains(&"main"), "Should find main, got: {:?}", names);
1779 assert!(names.contains(&"Point"), "Should find Point, got: {:?}", names);
1780 assert!(names.contains(&"Color"), "Should find Color, got: {:?}", names);
1781 assert!(names.contains(&"Person"), "Should find Person, got: {:?}", names);
1782
1783 assert_eq!(types["greet"], "function");
1784 assert_eq!(types["add"], "function");
1785 assert_eq!(types["Point"], "struct");
1786 assert_eq!(types["Color"], "enum");
1787 assert_eq!(types["Person"], "struct");
1788 }
1789}