1mod entity_extractor;
2pub mod languages;
3
4use std::cell::RefCell;
5use std::collections::HashMap;
6
7use crate::model::entity::SemanticEntity;
8use crate::parser::plugin::SemanticParserPlugin;
9use languages::{get_all_code_extensions, get_language_config};
10use entity_extractor::extract_entities;
11
12pub struct CodeParserPlugin;
13
14thread_local! {
17 static PARSER_CACHE: RefCell<HashMap<&'static str, tree_sitter::Parser>> = RefCell::new(HashMap::new());
18}
19
20impl SemanticParserPlugin for CodeParserPlugin {
21 fn id(&self) -> &str {
22 "code"
23 }
24
25 fn extensions(&self) -> &[&str] {
26 get_all_code_extensions()
27 }
28
29 fn extract_entities(&self, content: &str, file_path: &str) -> Vec<SemanticEntity> {
30 self.extract_entities_with_tree(content, file_path).0
31 }
32
33 fn extract_entities_with_tree(
34 &self,
35 content: &str,
36 file_path: &str,
37 ) -> (Vec<SemanticEntity>, Option<tree_sitter::Tree>) {
38 let ext = std::path::Path::new(file_path)
39 .extension()
40 .and_then(|e| e.to_str())
41 .map(|e| format!(".{}", e.to_lowercase()))
42 .unwrap_or_default();
43
44 let config = match get_language_config(&ext) {
45 Some(c) => c,
46 None => {
47 match detect_ext_from_content(content)
49 .and_then(|se| get_language_config(&se))
50 {
51 Some(c) => c,
52 None => return (Vec::new(), None),
53 }
54 }
55 };
56
57 let language = match (config.get_language)() {
58 Some(lang) => lang,
59 None => return (Vec::new(), None),
60 };
61
62 PARSER_CACHE.with(|cache| {
63 let mut cache = cache.borrow_mut();
64 let parser = cache.entry(config.id).or_insert_with(|| {
65 let mut p = tree_sitter::Parser::new();
66 let _ = p.set_language(&language);
67 p
68 });
69
70 let tree = match parser.parse(content.as_bytes(), None) {
71 Some(t) => t,
72 None => return (Vec::new(), None),
73 };
74
75 let entities = extract_entities(&tree, file_path, config, content);
76 (entities, Some(tree))
77 })
78 }
79}
80
81use crate::parser::registry::detect_ext_from_content;
82
83#[cfg(test)]
84mod tests {
85 use super::*;
86
87 #[test]
88 fn test_java_entity_extraction() {
89 let code = r#"
90package com.example;
91
92import java.util.List;
93
94public class UserService {
95 private String name;
96
97 public UserService(String name) {
98 this.name = name;
99 }
100
101 public List<User> getUsers() {
102 return db.findAll();
103 }
104
105 public void createUser(User user) {
106 db.save(user);
107 }
108}
109
110interface Repository<T> {
111 T findById(String id);
112 List<T> findAll();
113}
114
115enum Status {
116 ACTIVE,
117 INACTIVE,
118 DELETED
119}
120"#;
121 let plugin = CodeParserPlugin;
122 let entities = plugin.extract_entities(code, "UserService.java");
123 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
124 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
125 eprintln!("Java entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
126
127 assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
128 assert!(names.contains(&"Repository"), "Should find interface Repository, got: {:?}", names);
129 assert!(names.contains(&"Status"), "Should find enum Status, got: {:?}", names);
130 }
131
132 #[test]
133 fn test_java_nested_methods() {
134 let code = r#"
135public class Calculator {
136 public int add(int a, int b) {
137 return a + b;
138 }
139
140 public int subtract(int a, int b) {
141 return a - b;
142 }
143}
144"#;
145 let plugin = CodeParserPlugin;
146 let entities = plugin.extract_entities(code, "Calculator.java");
147 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
148 eprintln!("Java nested: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
149
150 assert!(names.contains(&"Calculator"), "Should find Calculator class");
151 assert!(names.contains(&"add"), "Should find add method, got: {:?}", names);
152 assert!(names.contains(&"subtract"), "Should find subtract method, got: {:?}", names);
153
154 let add = entities.iter().find(|e| e.name == "add").unwrap();
156 assert!(add.parent_id.is_some(), "add should have parent_id");
157 }
158
159 #[test]
160 fn test_c_entity_extraction() {
161 let code = r#"
162#include <stdio.h>
163
164struct Point {
165 int x;
166 int y;
167};
168
169enum Color {
170 RED,
171 GREEN,
172 BLUE
173};
174
175typedef struct {
176 char name[50];
177 int age;
178} Person;
179
180void greet(const char* name) {
181 printf("Hello, %s!\n", name);
182}
183
184int add(int a, int b) {
185 return a + b;
186}
187
188int main() {
189 greet("world");
190 return 0;
191}
192"#;
193 let plugin = CodeParserPlugin;
194 let entities = plugin.extract_entities(code, "main.c");
195 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
196 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
197 eprintln!("C entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
198
199 assert!(names.contains(&"greet"), "Should find greet function, got: {:?}", names);
200 assert!(names.contains(&"add"), "Should find add function, got: {:?}", names);
201 assert!(names.contains(&"main"), "Should find main function, got: {:?}", names);
202 assert!(names.contains(&"Point"), "Should find Point struct, got: {:?}", names);
203 assert!(names.contains(&"Color"), "Should find Color enum, got: {:?}", names);
204 }
205
206 #[test]
207 fn test_c_function_locals_not_extracted() {
208 let code = r#"
209int global_count = 0;
210int helper(void);
211
212int main(void) {
213 int local = helper();
214 const char *message = "hello";
215 return local + global_count;
216}
217"#;
218 let plugin = CodeParserPlugin;
219 let entities = plugin.extract_entities(code, "main.c");
220 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
221
222 assert!(names.contains(&"global_count"), "got: {:?}", names);
223 assert!(names.contains(&"helper"), "got: {:?}", names);
224 assert!(names.contains(&"main"), "got: {:?}", names);
225 assert!(!names.contains(&"local"), "got: {:?}", names);
226 assert!(!names.contains(&"message"), "got: {:?}", names);
227 }
228
229 #[test]
230 fn test_cpp_entity_extraction() {
231 let code = "namespace math {\nclass Vector3 {\npublic:\n float length() const { return 0; }\n};\n}\nvoid greet() {}\n";
232 let plugin = CodeParserPlugin;
233 let entities = plugin.extract_entities(code, "main.cpp");
234 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
235 assert!(names.contains(&"math"), "got: {:?}", names);
236 assert!(names.contains(&"Vector3"), "got: {:?}", names);
237 assert!(names.contains(&"greet"), "got: {:?}", names);
238 }
239
240 #[test]
241 fn test_cpp_function_locals_not_extracted() {
242 let code = r#"
243int global_value = 1;
244int helper();
245
246int main() {
247 int local = helper();
248 auto lambda = []() {
249 int lambda_local = 3;
250 return lambda_local;
251 };
252 return local + lambda();
253}
254"#;
255 let plugin = CodeParserPlugin;
256 let entities = plugin.extract_entities(code, "main.cpp");
257 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
258
259 assert!(names.contains(&"global_value"), "got: {:?}", names);
260 assert!(names.contains(&"helper"), "got: {:?}", names);
261 assert!(names.contains(&"main"), "got: {:?}", names);
262 assert!(!names.contains(&"local"), "got: {:?}", names);
263 assert!(!names.contains(&"lambda"), "got: {:?}", names);
264 assert!(!names.contains(&"lambda_local"), "got: {:?}", names);
265 }
266
267 #[test]
268 fn test_ruby_entity_extraction() {
269 let code = "module Auth\n class User\n def greet\n \"hi\"\n end\n end\nend\ndef helper(x)\n x * 2\nend\n";
270 let plugin = CodeParserPlugin;
271 let entities = plugin.extract_entities(code, "auth.rb");
272 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
273 assert!(names.contains(&"Auth"), "got: {:?}", names);
274 assert!(names.contains(&"User"), "got: {:?}", names);
275 assert!(names.contains(&"helper"), "got: {:?}", names);
276 }
277
278 #[test]
279 fn test_csharp_entity_extraction() {
280 let code = "namespace MyApp {\npublic class User {\n public string GetName() { return \"\"; }\n}\npublic enum Role { Admin, User }\n}\n";
281 let plugin = CodeParserPlugin;
282 let entities = plugin.extract_entities(code, "Models.cs");
283 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
284 assert!(names.contains(&"MyApp"), "got: {:?}", names);
285 assert!(names.contains(&"User"), "got: {:?}", names);
286 assert!(names.contains(&"Role"), "got: {:?}", names);
287 }
288
289 #[test]
290 fn test_swift_entity_extraction() {
291 let code = r#"
292import Foundation
293
294class UserService {
295 var name: String
296
297 init(name: String) {
298 self.name = name
299 }
300
301 func getUsers() -> [User] {
302 return db.findAll()
303 }
304}
305
306struct Point {
307 var x: Double
308 var y: Double
309}
310
311enum Status {
312 case active
313 case inactive
314 case deleted
315}
316
317protocol Repository {
318 associatedtype Item
319 func findById(id: String) -> Item?
320 func findAll() -> [Item]
321}
322
323func helper(x: Int) -> Int {
324 return x * 2
325}
326"#;
327 let plugin = CodeParserPlugin;
328 let entities = plugin.extract_entities(code, "UserService.swift");
329 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
330 eprintln!("Swift entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
331
332 assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
333 assert!(names.contains(&"Point"), "Should find struct Point, got: {:?}", names);
334 assert!(names.contains(&"Status"), "Should find enum Status, got: {:?}", names);
335 assert!(names.contains(&"Repository"), "Should find protocol Repository, got: {:?}", names);
336 assert!(names.contains(&"helper"), "Should find function helper, got: {:?}", names);
337
338 let point = entities.iter().find(|e| e.name == "Point").unwrap();
339 assert_eq!(point.entity_type, "struct");
340
341 let status = entities.iter().find(|e| e.name == "Status").unwrap();
342 assert_eq!(status.entity_type, "enum");
343 }
344
345 #[test]
346 fn test_swift_conditional_compilation_inside_struct() {
347 let code = r#"
348import ArgumentParser
349
350public struct TuistCommand: AsyncParsableCommand {
351 public init() {}
352
353 public static var configuration: CommandConfiguration {
354 let comment = "brace in string }"
355 let multiline = """
356 brace in multiline }
357 escaped \"""
358 """
359 /* brace in comment } */
360 CommandConfiguration(commandName: "tuist")
361 }
362
363 #if os(macOS)
364 public static var groupedSubcommands: [ParsableCommand.Type] {
365 [InstallCommand.self]
366 }
367 #else
368 public static var groupedSubcommands: [ParsableCommand.Type] {
369 []
370 }
371 #endif
372
373 public func run() async throws {}
374}
375"#;
376 let plugin = CodeParserPlugin;
377 let entities = plugin.extract_entities(code, "TuistCommand.swift");
378 eprintln!(
379 "Swift conditional entities: {:?}",
380 entities
381 .iter()
382 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
383 .collect::<Vec<_>>()
384 );
385
386 let command = entities
387 .iter()
388 .find(|e| e.name == "TuistCommand")
389 .expect("Should recover TuistCommand struct");
390 assert_eq!(command.entity_type, "struct");
391 assert!(command.parent_id.is_none());
392
393 let renamed_code = code.replace("TuistCommand", "RenamedCommand");
394 let renamed_entities = plugin.extract_entities(&renamed_code, "TuistCommand.swift");
395 let renamed_command = renamed_entities
396 .iter()
397 .find(|e| e.name == "RenamedCommand")
398 .expect("Should recover renamed command struct");
399 assert_eq!(command.structural_hash, renamed_command.structural_hash);
400
401 for member in ["init", "configuration", "run"] {
402 let entity = entities
403 .iter()
404 .find(|e| e.name == member)
405 .unwrap_or_else(|| panic!("Should find {member}"));
406 assert_eq!(entity.parent_id.as_deref(), Some(command.id.as_str()));
407 }
408
409 let grouped_subcommands: Vec<_> = entities
410 .iter()
411 .filter(|e| e.name == "groupedSubcommands")
412 .collect();
413 assert_eq!(grouped_subcommands.len(), 2);
414 assert!(grouped_subcommands
415 .iter()
416 .all(|entity| entity.parent_id.as_deref() == Some(command.id.as_str())));
417 }
418
419 #[test]
420 fn test_elixir_entity_extraction() {
421 let code = r#"
422defmodule MyApp.Accounts do
423 def create_user(attrs) do
424 %User{}
425 |> User.changeset(attrs)
426 |> Repo.insert()
427 end
428
429 defp validate(attrs) do
430 # private helper
431 :ok
432 end
433
434 defmacro is_admin(user) do
435 quote do
436 unquote(user).role == :admin
437 end
438 end
439
440 defguard is_positive(x) when is_integer(x) and x > 0
441end
442
443defprotocol Printable do
444 def to_string(data)
445end
446
447defimpl Printable, for: Integer do
448 def to_string(i), do: Integer.to_string(i)
449end
450"#;
451 let plugin = CodeParserPlugin;
452 let entities = plugin.extract_entities(code, "accounts.ex");
453 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
454 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
455 eprintln!("Elixir entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
456
457 assert!(names.contains(&"MyApp.Accounts"), "Should find module, got: {:?}", names);
458 assert!(names.contains(&"create_user"), "Should find def, got: {:?}", names);
459 assert!(names.contains(&"validate"), "Should find defp, got: {:?}", names);
460 assert!(names.contains(&"is_admin"), "Should find defmacro, got: {:?}", names);
461 assert!(names.contains(&"Printable"), "Should find defprotocol, got: {:?}", names);
462
463 let create_user = entities.iter().find(|e| e.name == "create_user").unwrap();
465 assert!(create_user.parent_id.is_some(), "create_user should be nested under module");
466 }
467
468 #[test]
469 fn test_bash_entity_extraction() {
470 let code = r#"#!/bin/bash
471
472greet() {
473 echo "Hello, $1!"
474}
475
476function deploy {
477 echo "deploying..."
478}
479
480# not a function
481echo "main script"
482"#;
483 let plugin = CodeParserPlugin;
484 let entities = plugin.extract_entities(code, "deploy.sh");
485 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
486 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
487 eprintln!("Bash entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
488
489 assert!(names.contains(&"greet"), "Should find greet(), got: {:?}", names);
490 assert!(names.contains(&"deploy"), "Should find function deploy, got: {:?}", names);
491 assert_eq!(entities.len(), 2, "Should only find functions, got: {:?}", names);
492 }
493
494 #[test]
495 fn test_typescript_entity_extraction() {
496 let code = r#"
498export function hello(): string {
499 return "hello";
500}
501
502export class Greeter {
503 greet(name: string): string {
504 return `Hello, ${name}!`;
505 }
506}
507"#;
508 let plugin = CodeParserPlugin;
509 let entities = plugin.extract_entities(code, "test.ts");
510 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
511 assert!(names.contains(&"hello"), "Should find hello function");
512 assert!(names.contains(&"Greeter"), "Should find Greeter class");
513 }
514
515 #[test]
516 fn test_module_typescript_entity_extraction() {
517 let code = r#"
518export function hello(): string {
519 return "hello";
520}
521"#;
522 let plugin = CodeParserPlugin;
523 let entities = plugin.extract_entities(code, "test.mts");
524 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
525
526 assert!(names.contains(&"hello"), "Should find hello function");
527 }
528
529 #[test]
530 fn test_commonjs_typescript_entity_extraction() {
531 let code = r#"
532export class Greeter {
533 greet(name: string): string {
534 return `Hello, ${name}!`;
535 }
536}
537"#;
538 let plugin = CodeParserPlugin;
539 let entities = plugin.extract_entities(code, "test.cts");
540 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
541
542 assert!(names.contains(&"Greeter"), "Should find Greeter class");
543 assert!(names.contains(&"greet"), "Should find greet method");
544 }
545
546 #[test]
547 fn test_typescript_generator_function_entity_extraction() {
548 let code = r#"
549export async function* streamUsers(): AsyncGenerator<string> {
550 yield "alice";
551}
552"#;
553 let plugin = CodeParserPlugin;
554 let entities = plugin.extract_entities(code, "stream.ts");
555 let stream = entities.iter().find(|e| e.name == "streamUsers");
556
557 assert!(stream.is_some(), "Should find generator function, got: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
558 assert_eq!(stream.unwrap().entity_type, "function");
559 }
560
561 #[test]
562 fn test_javascript_generator_function_entity_extraction() {
563 let code = r#"
564export function* ids() {
565 yield 1;
566 yield 2;
567}
568"#;
569 let plugin = CodeParserPlugin;
570 let entities = plugin.extract_entities(code, "ids.js");
571 let ids = entities.iter().find(|e| e.name == "ids");
572
573 assert!(ids.is_some(), "Should find generator function, got: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
574 assert_eq!(ids.unwrap().entity_type, "function");
575 }
576
577 #[test]
578 fn test_nested_functions_typescript() {
579 let code = r#"
580function outer() {
581 function inner() {
582 return 42;
583 }
584 return inner();
585}
586"#;
587 let plugin = CodeParserPlugin;
588 let entities = plugin.extract_entities(code, "nested.ts");
589 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
590 eprintln!("Nested TS: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
591
592 assert!(names.contains(&"outer"), "Should find outer, got: {:?}", names);
593 assert!(names.contains(&"inner"), "Should find inner, got: {:?}", names);
594
595 let inner = entities.iter().find(|e| e.name == "inner").unwrap();
596 assert!(inner.parent_id.is_some(), "inner should have parent_id");
597 }
598
599 #[test]
600 fn test_typescript_nested_anonymous_class_fields() {
601 let code = r#"
602class L1 {
603 L2 = class {
604 L3 = class {
605 L4 = class {
606 method() { return 1; }
607 };
608 };
609 };
610}
611"#;
612 let plugin = CodeParserPlugin;
613 let entities = plugin.extract_entities(code, "a.ts");
614 let find = |name: &str| {
615 entities.iter().find(|e| e.name == name).unwrap_or_else(|| {
616 panic!(
617 "missing {name}; got: {:?}",
618 entities
619 .iter()
620 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
621 .collect::<Vec<_>>()
622 )
623 })
624 };
625
626 let l1 = find("L1");
627 assert_eq!(l1.entity_type, "class");
628 let l1_id = l1.id.clone();
629
630 let l2 = find("L2");
631 assert_eq!(l2.entity_type, "field");
632 assert_eq!(l2.parent_id.as_deref(), Some(l1_id.as_str()));
633 let l2_id = l2.id.clone();
634
635 let l3 = find("L3");
636 assert_eq!(l3.entity_type, "field");
637 assert_eq!(l3.parent_id.as_deref(), Some(l2_id.as_str()));
638 let l3_id = l3.id.clone();
639
640 let l4 = find("L4");
641 assert_eq!(l4.entity_type, "field");
642 assert_eq!(l4.parent_id.as_deref(), Some(l3_id.as_str()));
643 let l4_id = l4.id.clone();
644
645 let method = find("method");
646 assert_eq!(method.entity_type, "method");
647 assert_eq!(method.parent_id.as_deref(), Some(l4_id.as_str()));
648 assert_eq!(method.id, "a.ts::class::L1::L2::L3::L4::method");
649 }
650
651 #[test]
652 fn test_nested_functions_python() {
653 let code = "def outer():\n def inner():\n return 42\n return inner()\n";
654 let plugin = CodeParserPlugin;
655 let entities = plugin.extract_entities(code, "nested.py");
656 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
657
658 assert!(names.contains(&"outer"), "got: {:?}", names);
659 assert!(names.contains(&"inner"), "got: {:?}", names);
660
661 let inner = entities.iter().find(|e| e.name == "inner").unwrap();
662 assert!(inner.parent_id.is_some(), "inner should have parent_id");
663 }
664
665 #[test]
666 fn test_nested_functions_rust() {
667 let code = "fn outer() {\n fn inner() -> i32 {\n 42\n }\n inner();\n}\n";
668 let plugin = CodeParserPlugin;
669 let entities = plugin.extract_entities(code, "nested.rs");
670 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
671
672 assert!(names.contains(&"outer"), "got: {:?}", names);
673 assert!(names.contains(&"inner"), "got: {:?}", names);
674
675 let inner = entities.iter().find(|e| e.name == "inner").unwrap();
676 assert!(inner.parent_id.is_some(), "inner should have parent_id");
677 }
678
679 #[test]
680 fn test_rust_impl_blocks_unique_names() {
681 let code = r#"
682trait Greeting {
683 fn greet(&self) -> String;
684}
685
686struct Person;
687struct Robot;
688struct Cat;
689
690impl Greeting for Person {
691 fn greet(&self) -> String { "Hello".to_string() }
692}
693
694impl Greeting for Robot {
695 fn greet(&self) -> String { "Beep".to_string() }
696}
697
698impl Greeting for Cat {
699 fn greet(&self) -> String { "Meow".to_string() }
700}
701"#;
702 let plugin = CodeParserPlugin;
703 let entities = plugin.extract_entities(code, "impls.rs");
704 let impl_entities: Vec<&_> = entities.iter()
705 .filter(|e| e.entity_type == "impl")
706 .collect();
707 let names: Vec<&str> = impl_entities.iter().map(|e| e.name.as_str()).collect();
708
709 assert_eq!(impl_entities.len(), 3, "Should find 3 impl blocks, got: {:?}", names);
710 assert!(names.contains(&"Greeting for Person"), "got: {:?}", names);
711 assert!(names.contains(&"Greeting for Robot"), "got: {:?}", names);
712 assert!(names.contains(&"Greeting for Cat"), "got: {:?}", names);
713 }
714
715 #[test]
716 fn test_nested_functions_go() {
717 let code = "package main\n\nfunc outer() {\n var x int = 42\n _ = x\n}\n";
719 let plugin = CodeParserPlugin;
720 let entities = plugin.extract_entities(code, "nested.go");
721 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
722
723 assert!(names.contains(&"outer"), "got: {:?}", names);
724 }
725
726 #[test]
727 fn test_renamed_function_same_structural_hash() {
728 let code_a = "def get_card():\n return db.query('cards')\n";
729 let code_b = "def get_card_1():\n return db.query('cards')\n";
730
731 let plugin = CodeParserPlugin;
732 let entities_a = plugin.extract_entities(code_a, "a.py");
733 let entities_b = plugin.extract_entities(code_b, "b.py");
734
735 assert_eq!(entities_a.len(), 1, "Should find one entity in a");
736 assert_eq!(entities_b.len(), 1, "Should find one entity in b");
737 assert_eq!(entities_a[0].name, "get_card");
738 assert_eq!(entities_b[0].name, "get_card_1");
739
740 assert_eq!(
742 entities_a[0].structural_hash, entities_b[0].structural_hash,
743 "Renamed function with identical body should have same structural_hash"
744 );
745
746 assert_ne!(
748 entities_a[0].content_hash, entities_b[0].content_hash,
749 "Content hash should differ since raw content includes the name"
750 );
751 }
752
753 #[test]
754 fn test_hcl_entity_extraction() {
755 let code = r#"
756region = "eu-west-1"
757
758variable "image_id" {
759 type = string
760}
761
762resource "aws_instance" "web" {
763 ami = var.image_id
764
765 lifecycle {
766 create_before_destroy = true
767 }
768}
769"#;
770 let plugin = CodeParserPlugin;
771 let entities = plugin.extract_entities(code, "main.tf");
772 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
773 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
774 eprintln!("HCL entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
775
776 assert!(names.contains(&"region"), "Should find top-level attribute, got: {:?}", names);
777 assert!(names.contains(&"variable.image_id"), "Should find variable block, got: {:?}", names);
778 assert!(names.contains(&"resource.aws_instance.web"), "Should find resource block, got: {:?}", names);
779 assert!(
780 names.contains(&"resource.aws_instance.web.lifecycle"),
781 "Should find nested lifecycle block with qualified name, got: {:?}",
782 names
783 );
784 assert!(!names.contains(&"ami"), "Should skip nested attributes inside blocks, got: {:?}", names);
785 assert!(
786 !names.contains(&"create_before_destroy"),
787 "Should skip nested attributes inside nested blocks, got: {:?}",
788 names
789 );
790
791 let lifecycle = entities
792 .iter()
793 .find(|e| e.name == "resource.aws_instance.web.lifecycle")
794 .unwrap();
795 assert!(lifecycle.parent_id.is_some(), "lifecycle should be nested under resource");
796 assert!(types.contains(&"attribute"), "Should preserve attribute entity type for top-level attributes");
797 }
798
799 #[test]
800 fn test_kotlin_entity_extraction() {
801 let code = r#"
802class UserService {
803 val name: String = ""
804
805 fun greet(): String {
806 return "Hello, $name"
807 }
808
809 companion object {
810 fun create(): UserService = UserService()
811 }
812}
813
814interface Repository {
815 fun findById(id: Int): Any?
816}
817
818object AppConfig {
819 val version = "1.0"
820}
821
822fun topLevel(x: Int): Int = x * 2
823"#;
824 let plugin = CodeParserPlugin;
825 let entities = plugin.extract_entities(code, "App.kt");
826 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
827 eprintln!("Kotlin entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
828 assert!(names.contains(&"UserService"), "got: {:?}", names);
829 assert!(names.contains(&"greet"), "got: {:?}", names);
830 assert!(names.contains(&"Repository"), "got: {:?}", names);
831 assert!(names.contains(&"findById"), "got: {:?}", names);
832 assert!(names.contains(&"AppConfig"), "got: {:?}", names);
833 assert!(names.contains(&"topLevel"), "got: {:?}", names);
834 }
835
836 #[test]
837 fn test_xml_entity_extraction() {
838 let code = r#"<?xml version="1.0" encoding="UTF-8"?>
839<project>
840 <groupId>com.example</groupId>
841 <artifactId>my-app</artifactId>
842 <dependencies>
843 <dependency>
844 <groupId>junit</groupId>
845 <artifactId>junit</artifactId>
846 </dependency>
847 </dependencies>
848 <build>
849 <plugins>
850 <plugin>
851 <groupId>org.apache.maven</groupId>
852 </plugin>
853 </plugins>
854 </build>
855</project>
856"#;
857 let plugin = CodeParserPlugin;
858 let entities = plugin.extract_entities(code, "pom.xml");
859 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
860 eprintln!("XML entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
861 assert!(names.contains(&"project"), "got: {:?}", names);
862 assert!(names.contains(&"dependencies"), "got: {:?}", names);
863 assert!(names.contains(&"build"), "got: {:?}", names);
864 }
865
866 #[test]
867 fn test_arrow_callback_scope_boundary_typescript() {
868 let code = r#"
872const activeQueues = [
873 { queue: queues.fooQueue, processor: foo.process },
874];
875
876activeQueues.forEach((handler: any) => {
877 const queue = handler.queue;
878 let retries = 0;
879
880 class QueueHandler {
881 handle() { return queue; }
882 }
883
884 function createHandler() {
885 return new QueueHandler();
886 }
887
888 queue.process((job) => {
889 const orderId = job.data.orderId;
890 return orderId;
891 });
892});
893
894function handleFailure(job: any, err: any) {
895 console.error('failed', err);
896}
897"#;
898 let plugin = CodeParserPlugin;
899 let entities = plugin.extract_entities(code, "process.ts");
900 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
901 let top_level: Vec<&str> = entities
902 .iter()
903 .filter(|e| e.parent_id.is_none())
904 .map(|e| e.name.as_str())
905 .collect();
906
907 assert!(top_level.contains(&"activeQueues"), "got: {:?}", top_level);
909 assert!(top_level.contains(&"handleFailure"), "got: {:?}", top_level);
910
911 assert!(names.contains(&"QueueHandler"), "got: {:?}", names);
913 assert!(names.contains(&"handle"), "got: {:?}", names);
914 assert!(names.contains(&"createHandler"), "got: {:?}", names);
915
916 assert!(!names.contains(&"queue"), "got: {:?}", names);
918 assert!(!names.contains(&"retries"), "got: {:?}", names);
919 assert!(!names.contains(&"orderId"), "got: {:?}", names);
920 }
921
922 #[test]
923 fn test_top_level_iife_wrapper_still_extracts_typescript_entities() {
924 let code = r#"
925function factory() {
926 class Foo {
927 method(): number {
928 return 1;
929 }
930 }
931
932 function bar(): Foo {
933 return new Foo();
934 }
935}
936
937factory();
938"#;
939 let plugin = CodeParserPlugin;
940 let entities = plugin.extract_entities(code, "wrapped.ts");
941 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
942 assert!(
943 names.contains(&"factory"),
944 "Should find top-level wrapper function, got: {:?}",
945 names
946 );
947 assert!(
948 names.contains(&"Foo"),
949 "Should find class inside top-level wrapper, got: {:?}",
950 names
951 );
952 assert!(
953 names.contains(&"bar"),
954 "Should find function inside top-level wrapper, got: {:?}",
955 names
956 );
957 }
958
959 #[test]
960 fn test_top_level_iife_still_extracts_typescript_entities() {
961 let code = r#"
962(() => {
963 class Foo {
964 method(): number {
965 return 1;
966 }
967 }
968
969 function bar(): Foo {
970 return new Foo();
971 }
972})();
973"#;
974 let plugin = CodeParserPlugin;
975 let entities = plugin.extract_entities(code, "iife.ts");
976 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
977 assert!(
978 names.contains(&"Foo"),
979 "Should find class inside top-level IIFE, got: {:?}",
980 names
981 );
982 assert!(
983 names.contains(&"bar"),
984 "Should find function inside top-level IIFE, got: {:?}",
985 names
986 );
987 }
988
989 #[test]
990 fn test_function_locals_not_extracted_as_nested_entities_typescript() {
991 let code = r#"
992export default function foo() {
993 const x = 1;
994 return x;
995}
996"#;
997 let plugin = CodeParserPlugin;
998 let entities = plugin.extract_entities(code, "default-export.ts");
999 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1000 assert!(
1001 names.contains(&"foo"),
1002 "Should find exported function, got: {:?}",
1003 names
1004 );
1005 assert!(
1006 !names.contains(&"x"),
1007 "Local inside function should not be extracted as an entity, got: {:?}",
1008 names
1009 );
1010 }
1011
1012 #[test]
1013 fn test_function_expression_scope_boundary_typescript() {
1014 let code = r#"
1017const foo = function namedExpr(x: number) {
1018 const inner = x + 1;
1019 return inner;
1020};
1021
1022const bar = function(y: number) {
1023 const local = y * 2;
1024 return local;
1025};
1026
1027const items = [1, 2, 3];
1028
1029items.forEach(function process(item) {
1030 const doubled = item * 2;
1031 console.log(doubled);
1032});
1033"#;
1034 let plugin = CodeParserPlugin;
1035 let entities = plugin.extract_entities(code, "funexpr.ts");
1036 let top_level: Vec<&str> = entities
1037 .iter()
1038 .filter(|e| e.parent_id.is_none())
1039 .map(|e| e.name.as_str())
1040 .collect();
1041 let find = |name: &str| entities.iter().find(|e| e.name == name).unwrap();
1042 let all_names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1043
1044 assert!(top_level.contains(&"foo"), "got: {:?}", top_level);
1047 assert!(top_level.contains(&"bar"), "got: {:?}", top_level);
1048 assert!(top_level.contains(&"items"), "got: {:?}", top_level);
1049 assert_eq!(find("foo").entity_type, "function");
1050 assert_eq!(find("bar").entity_type, "function");
1051 assert_eq!(find("items").entity_type, "variable");
1052
1053 assert!(!all_names.contains(&"inner"), "got: {:?}", all_names);
1055 assert!(!all_names.contains(&"local"), "got: {:?}", all_names);
1056 assert!(!all_names.contains(&"doubled"), "got: {:?}", all_names);
1057
1058 assert!(!top_level.contains(&"process"), "got: {:?}", top_level);
1060 }
1061
1062 #[test]
1063 fn test_variable_assigned_arrow_extracts_inner_entities() {
1064 let code = r#"
1067const handler = () => {
1068 class Inner {
1069 run() { return 1; }
1070 }
1071
1072 function make() {
1073 return new Inner();
1074 }
1075
1076 const local = 42;
1077};
1078"#;
1079 let plugin = CodeParserPlugin;
1080 let entities = plugin.extract_entities(code, "assigned.ts");
1081 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1082 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1083
1084 assert_eq!(handler.entity_type, "function");
1085 assert!(names.contains(&"handler"), "got: {:?}", names);
1086 assert!(names.contains(&"Inner"), "got: {:?}", names);
1087 assert!(names.contains(&"run"), "got: {:?}", names);
1088 assert!(names.contains(&"make"), "got: {:?}", names);
1089 assert!(!names.contains(&"local"), "got: {:?}", names);
1090 }
1091
1092 #[test]
1093 fn test_variable_assigned_function_expression_extracts_inner_entities() {
1094 let code = r#"
1096const handler = function() {
1097 class Inner {}
1098 function make() { return new Inner(); }
1099 const local = 42;
1100};
1101"#;
1102 let plugin = CodeParserPlugin;
1103 let entities = plugin.extract_entities(code, "funexpr-inner.ts");
1104 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1105 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1106
1107 assert_eq!(handler.entity_type, "function");
1108 assert!(names.contains(&"handler"), "got: {:?}", names);
1109 assert!(names.contains(&"Inner"), "got: {:?}", names);
1110 assert!(names.contains(&"make"), "got: {:?}", names);
1111 assert!(!names.contains(&"local"), "got: {:?}", names);
1112 }
1113
1114 #[test]
1115 fn test_let_assigned_arrow_stays_variable_typescript() {
1116 let code = r#"
1117let handler = () => {
1118 return 42;
1119};
1120"#;
1121 let plugin = CodeParserPlugin;
1122 let entities = plugin.extract_entities(code, "let-assigned.ts");
1123 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1124
1125 assert_eq!(handler.entity_type, "variable");
1126 }
1127
1128 #[test]
1129 fn test_const_assigned_arrow_promoted_to_function_javascript() {
1130 let code = r#"
1131const handler = () => {
1132 return 42;
1133};
1134"#;
1135 let plugin = CodeParserPlugin;
1136 let entities = plugin.extract_entities(code, "handler.js");
1137 let handler = entities.iter().find(|e| e.name == "handler").unwrap();
1138
1139 assert_eq!(handler.entity_type, "function");
1140 }
1141
1142 #[test]
1143 fn test_js_ts_multi_declarator_promotes_each_const_initializer() {
1144 let code = r#"
1145const value = 1, handler = () => value;
1146const first = () => 1, second = 2;
1147"#;
1148 let plugin = CodeParserPlugin;
1149 let entities = plugin.extract_entities(code, "sample.ts");
1150 let find = |name: &str| {
1151 entities.iter().find(|e| e.name == name).unwrap_or_else(|| {
1152 panic!(
1153 "missing {name}; got: {:?}",
1154 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>()
1155 )
1156 })
1157 };
1158
1159 assert_eq!(find("value").entity_type, "variable");
1160 assert_eq!(find("handler").entity_type, "function");
1161 assert_eq!(find("first").entity_type, "function");
1162 assert_eq!(find("second").entity_type, "variable");
1163 }
1164
1165 #[test]
1166 fn test_suppressed_multi_declarator_traverses_skipped_initializers() {
1167 let code = r#"
1168function wrapper() {
1169 const holder = class {
1170 run() { return 1; }
1171 }, handler = () => {
1172 class Inner {
1173 go() { return 2; }
1174 }
1175 }, value = 1;
1176}
1177"#;
1178 let plugin = CodeParserPlugin;
1179 let entities = plugin.extract_entities(code, "sample.ts");
1180 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1181 let find = |name: &str| {
1182 entities.iter().find(|e| e.name == name).unwrap_or_else(|| {
1183 panic!(
1184 "missing {name}; got: {:?}",
1185 entities
1186 .iter()
1187 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1188 .collect::<Vec<_>>()
1189 )
1190 })
1191 };
1192
1193 assert_eq!(find("wrapper").entity_type, "function");
1194 assert_eq!(find("handler").entity_type, "function");
1195 assert!(names.contains(&"run"), "got: {:?}", names);
1196 assert!(names.contains(&"Inner"), "got: {:?}", names);
1197 assert!(names.contains(&"go"), "got: {:?}", names);
1198 assert!(!names.contains(&"holder"), "got: {:?}", names);
1199 assert!(!names.contains(&"value"), "got: {:?}", names);
1200 }
1201
1202 #[test]
1203 fn test_go_var_declaration() {
1204 let code = r#"package featuremgmt
1205
1206type FeatureFlag struct {
1207 Name string
1208 Description string
1209 Stage string
1210}
1211
1212var standardFeatureFlags = []FeatureFlag{
1213 {
1214 Name: "panelTitleSearch",
1215 Description: "Search for dashboards using panel title",
1216 Stage: "PublicPreview",
1217 },
1218}
1219
1220func GetFlags() []FeatureFlag {
1221 return standardFeatureFlags
1222}
1223"#;
1224 let plugin = CodeParserPlugin;
1225 let entities = plugin.extract_entities(code, "flags.go");
1226 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1227 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1228 eprintln!("Go entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
1229
1230 assert!(names.contains(&"FeatureFlag"), "Should find type FeatureFlag, got: {:?}", names);
1231 assert!(names.contains(&"standardFeatureFlags"), "Should find var standardFeatureFlags, got: {:?}", names);
1232 assert!(names.contains(&"GetFlags"), "Should find func GetFlags, got: {:?}", names);
1233 }
1234
1235 #[test]
1236 fn test_go_grouped_var_declaration() {
1237 let code = r#"package test
1238
1239var (
1240 simple = 42
1241 flags = []string{"a", "b"}
1242)
1243
1244const (
1245 x = 1
1246 y = 2
1247)
1248
1249func main() {}
1250"#;
1251 let plugin = CodeParserPlugin;
1252 let entities = plugin.extract_entities(code, "test.go");
1253 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1254 let types: Vec<&str> = entities.iter().map(|e| e.entity_type.as_str()).collect();
1255 eprintln!("Go grouped entities: {:?}", names.iter().zip(types.iter()).collect::<Vec<_>>());
1256
1257 assert!(names.contains(&"flags") || names.contains(&"simple"), "Should find grouped var, got: {:?}", names);
1258 assert!(names.contains(&"x"), "Should find grouped const x, got: {:?}", names);
1259 assert!(names.contains(&"main"), "Should find func main, got: {:?}", names);
1260 }
1261
1262 #[test]
1263 fn test_dart_entity_extraction() {
1264 let code = r#"
1265import 'dart:math';
1266
1267class Calculator {
1268 final String name;
1269
1270 Calculator(this.name);
1271
1272 Calculator.withDefault() : name = 'default';
1273
1274 factory Calculator.create(String name) {
1275 return Calculator(name);
1276 }
1277
1278 int add(int a, int b) {
1279 return a + b;
1280 }
1281
1282 int get doubleAdd => add(1, 1) * 2;
1283
1284 set label(String value) {
1285 // no-op
1286 }
1287
1288 int operator +(Calculator other) {
1289 return 0;
1290 }
1291}
1292
1293mixin Loggable {
1294 void log(String message) {
1295 print(message);
1296 }
1297}
1298
1299extension StringExt on String {
1300 bool get isBlank => trim().isEmpty;
1301}
1302
1303enum Status {
1304 active,
1305 inactive;
1306
1307 String display() => name.toUpperCase();
1308}
1309
1310typedef Callback = void Function(int);
1311
1312int add(int a, int b) {
1313 return a + b;
1314}
1315
1316extension type Wrapper(int value) implements int {}
1317"#;
1318 let plugin = CodeParserPlugin;
1319 let entities = plugin.extract_entities(code, "calculator.dart");
1320 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1321 eprintln!(
1322 "Dart entities: {:?}",
1323 entities
1324 .iter()
1325 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1326 .collect::<Vec<_>>()
1327 );
1328
1329 assert!(names.contains(&"Calculator"), "Should find class, got: {:?}", names);
1331 assert!(names.contains(&"Loggable"), "Should find mixin, got: {:?}", names);
1332 assert!(names.contains(&"StringExt"), "Should find extension, got: {:?}", names);
1333 assert!(names.contains(&"Status"), "Should find enum, got: {:?}", names);
1334 assert!(names.contains(&"Callback"), "Should find typedef, got: {:?}", names);
1335 assert!(names.contains(&"add"), "Should find top-level function, got: {:?}", names);
1336 assert!(names.contains(&"Wrapper"), "Should find extension type, got: {:?}", names);
1337
1338 let add_method = entities.iter().find(|e| e.name == "add" && e.parent_id.is_some());
1340 assert!(add_method.is_some(), "Should find add method inside Calculator");
1341 assert_eq!(add_method.unwrap().entity_type, "method");
1342
1343 let unnamed_ctor = entities.iter().find(|e| e.name == "Calculator" && e.entity_type == "constructor");
1345 assert!(unnamed_ctor.is_some(), "Should find unnamed constructor");
1346 let named_ctor = entities.iter().find(|e| e.name == "Calculator.withDefault");
1347 assert!(named_ctor.is_some(), "Should find named constructor Calculator.withDefault, got: {:?}", names);
1348 assert_eq!(named_ctor.unwrap().entity_type, "constructor");
1349 assert_ne!(unnamed_ctor.unwrap().id, named_ctor.unwrap().id, "Named and unnamed constructors must have different entity IDs");
1350
1351 let factory_ctor = entities.iter().find(|e| e.name == "Calculator.create");
1353 assert!(factory_ctor.is_some(), "Should find factory constructor Calculator.create, got: {:?}", names);
1354 assert_eq!(factory_ctor.unwrap().entity_type, "constructor");
1355
1356 let getter = entities.iter().find(|e| e.name == "doubleAdd");
1358 assert!(getter.is_some(), "Should find getter doubleAdd");
1359 assert_eq!(getter.unwrap().entity_type, "getter");
1360
1361 let setter = entities.iter().find(|e| e.name == "label");
1362 assert!(setter.is_some(), "Should find setter label");
1363 assert_eq!(setter.unwrap().entity_type, "setter");
1364
1365 let operator = entities.iter().find(|e| e.name == "operator +");
1366 assert!(operator.is_some(), "Should find operator +");
1367 assert_eq!(operator.unwrap().entity_type, "method");
1368
1369 let log_method = entities.iter().find(|e| e.name == "log");
1371 assert!(log_method.is_some(), "Should find log in Loggable");
1372 assert!(log_method.unwrap().parent_id.is_some(), "log should have parent_id");
1373
1374 let callback = entities.iter().find(|e| e.name == "Callback").unwrap();
1376 assert_eq!(callback.entity_type, "type", "typedef should map to 'type'");
1377
1378 let loggable = entities.iter().find(|e| e.name == "Loggable").unwrap();
1379 assert_eq!(loggable.entity_type, "mixin");
1380
1381 let ext = entities.iter().find(|e| e.name == "StringExt").unwrap();
1382 assert_eq!(ext.entity_type, "extension");
1383
1384 let wrapper = entities.iter().find(|e| e.name == "Wrapper").unwrap();
1385 assert_eq!(wrapper.entity_type, "extension");
1386 }
1387
1388 #[test]
1389 fn test_dart_top_level_function_includes_body() {
1390 let code = r#"
1391int add(int a, int b) {
1392 return a + b;
1393}
1394
1395String greet(String name) => 'Hello, $name!';
1396"#;
1397 let plugin = CodeParserPlugin;
1398 let entities = plugin.extract_entities(code, "funcs.dart");
1399 eprintln!(
1400 "Dart top-level: {:?}",
1401 entities
1402 .iter()
1403 .map(|e| (&e.name, &e.entity_type, &e.content))
1404 .collect::<Vec<_>>()
1405 );
1406
1407 let add_fn = entities.iter().find(|e| e.name == "add").unwrap();
1408 assert!(
1409 add_fn.content.contains("return a + b"),
1410 "Top-level function content should include the body, got: {:?}",
1411 add_fn.content
1412 );
1413
1414 let greet_fn = entities.iter().find(|e| e.name == "greet").unwrap();
1415 assert!(
1416 greet_fn.content.contains("Hello"),
1417 "Expression body should be included, got: {:?}",
1418 greet_fn.content
1419 );
1420
1421 let code_v2 = r#"
1423int add(int a, int b) {
1424 return a * b;
1425}
1426
1427String greet(String name) => 'Hello, $name!';
1428"#;
1429 let entities_v2 = plugin.extract_entities(code_v2, "funcs.dart");
1430 let add_v2 = entities_v2.iter().find(|e| e.name == "add").unwrap();
1431 assert_ne!(
1432 add_fn.content_hash, add_v2.content_hash,
1433 "Body change should produce different content_hash"
1434 );
1435
1436 let greet_v2 = entities_v2.iter().find(|e| e.name == "greet").unwrap();
1438 assert_eq!(
1439 greet_fn.content_hash, greet_v2.content_hash,
1440 "Unchanged function should keep the same content_hash"
1441 );
1442 }
1443
1444 #[test]
1445 fn test_dart_renamed_named_constructor_same_structural_hash() {
1446 let code_a = r#"
1447class Foo {
1448 Foo.fromJson(Map<String, dynamic> json) {
1449 print(json);
1450 }
1451}
1452"#;
1453 let code_b = r#"
1454class Foo {
1455 Foo.fromMap(Map<String, dynamic> json) {
1456 print(json);
1457 }
1458}
1459"#;
1460 let plugin = CodeParserPlugin;
1461 let entities_a = plugin.extract_entities(code_a, "a.dart");
1462 let entities_b = plugin.extract_entities(code_b, "b.dart");
1463
1464 let ctor_a = entities_a.iter().find(|e| e.name == "Foo.fromJson").unwrap();
1465 let ctor_b = entities_b.iter().find(|e| e.name == "Foo.fromMap").unwrap();
1466
1467 assert_eq!(
1468 ctor_a.structural_hash, ctor_b.structural_hash,
1469 "Renamed named constructor with identical body should have same structural_hash"
1470 );
1471 assert_ne!(
1472 ctor_a.content_hash, ctor_b.content_hash,
1473 "Content hash should differ since raw content includes the name"
1474 );
1475 }
1476
1477 #[test]
1478 fn test_dart_top_level_getter_setter() {
1479 let code = r#"
1480int _value = 0;
1481
1482int get currentValue {
1483 return _value;
1484}
1485
1486set currentValue(int v) {
1487 _value = v;
1488}
1489"#;
1490 let plugin = CodeParserPlugin;
1491 let entities = plugin.extract_entities(code, "accessors.dart");
1492 eprintln!(
1493 "Dart top-level accessors: {:?}",
1494 entities
1495 .iter()
1496 .map(|e| (&e.name, &e.entity_type, &e.content))
1497 .collect::<Vec<_>>()
1498 );
1499
1500 let getter = entities.iter().find(|e| e.name == "currentValue" && e.entity_type == "getter");
1501 assert!(getter.is_some(), "Should find top-level getter, got: {:?}",
1502 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1503 assert!(
1504 getter.unwrap().content.contains("return _value"),
1505 "Top-level getter content should include the body"
1506 );
1507 assert!(getter.unwrap().parent_id.is_none(), "Top-level getter should have no parent");
1508
1509 let setter = entities.iter().find(|e| e.name == "currentValue" && e.entity_type == "function");
1513 assert!(setter.is_some(), "Should find top-level setter as function, got: {:?}",
1514 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1515 assert!(
1516 setter.unwrap().content.contains("_value = v"),
1517 "Top-level setter content should include the body"
1518 );
1519 }
1520
1521 #[test]
1522 fn test_dart_field_entity_type() {
1523 let code = r#"
1524class Config {
1525 final String name;
1526 static const int maxRetries = 3;
1527}
1528"#;
1529 let plugin = CodeParserPlugin;
1530 let entities = plugin.extract_entities(code, "config.dart");
1531 eprintln!(
1532 "Dart fields: {:?}",
1533 entities
1534 .iter()
1535 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1536 .collect::<Vec<_>>()
1537 );
1538
1539 let name_field = entities.iter().find(|e| e.name == "name" && e.parent_id.is_some());
1540 assert!(name_field.is_some(), "Should find field 'name', got: {:?}",
1541 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1542 assert_eq!(name_field.unwrap().entity_type, "field");
1543
1544 let max_retries = entities.iter().find(|e| e.name == "maxRetries");
1545 assert!(max_retries.is_some(), "Should find field 'maxRetries', got: {:?}",
1546 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1547 assert_eq!(max_retries.unwrap().entity_type, "field");
1548 }
1549
1550 #[test]
1551 fn test_dart_identifier_list_fields() {
1552 let code = r#"
1556abstract class Shape {
1557 abstract double x, y;
1558 abstract String label;
1559}
1560"#;
1561 let plugin = CodeParserPlugin;
1562 let entities = plugin.extract_entities(code, "shape.dart");
1563 eprintln!(
1564 "Dart identifier_list fields: {:?}",
1565 entities
1566 .iter()
1567 .map(|e| (&e.name, &e.entity_type, &e.parent_id))
1568 .collect::<Vec<_>>()
1569 );
1570
1571 let x_field = entities.iter().find(|e| e.name == "x");
1572 assert!(x_field.is_some(), "Should find field 'x' from identifier_list, got: {:?}",
1573 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1574 assert_eq!(x_field.unwrap().entity_type, "field");
1575 assert!(x_field.unwrap().parent_id.is_some(), "field 'x' should be nested under Shape");
1576
1577 let label_field = entities.iter().find(|e| e.name == "label");
1578 assert!(label_field.is_some(), "Should find field 'label' from single-element identifier_list, got: {:?}",
1579 entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1580 assert_eq!(label_field.unwrap().entity_type, "field");
1581 }
1582
1583 #[test]
1584 fn test_ocaml_entity_extraction() {
1585 let code = r#"
1586type color = Red | Green | Blue
1587
1588type point = {
1589 x : float;
1590 y : float;
1591}
1592
1593exception Not_found of string
1594
1595let greet name =
1596 Printf.printf "Hello, %s!\n" name
1597
1598let add a b = a + b
1599
1600let version = "1.0"
1601
1602let color_to_string = function
1603 | Red -> "red"
1604 | Blue -> "blue"
1605
1606let inc = fun x -> x + 1
1607
1608module MyModule = struct
1609 let helper x = x * 2
1610end
1611
1612module type Printable = sig
1613 val to_string : 'a -> string
1614end
1615
1616external caml_input : in_channel -> bytes -> int -> int -> int = "caml_input"
1617
1618class point_class x_init = object
1619 val mutable x = x_init
1620 method get_x = x
1621end
1622
1623class type measurable = object
1624 method measure : float
1625end
1626"#;
1627 let plugin = CodeParserPlugin;
1628 let entities = plugin.extract_entities(code, "example.ml");
1629 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1630 eprintln!("OCaml entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1631
1632 let find = |name: &str| entities.iter().find(|e| e.name == name)
1633 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1634
1635 assert_eq!(find("color").entity_type, "type");
1636 assert_eq!(find("point").entity_type, "type");
1637 assert_eq!(find("Not_found").entity_type, "exception");
1638 assert_eq!(find("greet").entity_type, "function");
1639 assert_eq!(find("add").entity_type, "function");
1640 assert_eq!(find("version").entity_type, "value");
1641 assert_eq!(find("color_to_string").entity_type, "function");
1642 assert_eq!(find("inc").entity_type, "function");
1643 assert_eq!(find("MyModule").entity_type, "module");
1644 assert_eq!(find("Printable").entity_type, "module_type");
1645 assert_eq!(find("caml_input").entity_type, "external");
1646 assert_eq!(find("point_class").entity_type, "class");
1647 assert_eq!(find("measurable").entity_type, "class_type");
1648 }
1649
1650 #[test]
1651 fn test_ocaml_nested_module_entities() {
1652 let code = r#"
1653module Outer = struct
1654 let x = 42
1655
1656 module Inner = struct
1657 let y = 0
1658 end
1659end
1660"#;
1661 let plugin = CodeParserPlugin;
1662 let entities = plugin.extract_entities(code, "nested.ml");
1663 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1664 eprintln!("OCaml nested: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
1665
1666 let find = |name: &str| entities.iter().find(|e| e.name == name)
1667 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1668
1669 let outer = find("Outer");
1670 let x = find("x");
1671 let inner = find("Inner");
1672 let y = find("y");
1673
1674 assert_eq!(outer.entity_type, "module");
1675 assert_eq!(x.entity_type, "value");
1676 assert_eq!(inner.entity_type, "module");
1677 assert_eq!(y.entity_type, "value");
1678
1679 assert!(x.parent_id.as_ref().is_some_and(|p| p == &outer.id), "x should be nested under Outer");
1680 assert!(inner.parent_id.as_ref().is_some_and(|p| p == &outer.id), "Inner should be nested under Outer");
1681 assert!(y.parent_id.as_ref().is_some_and(|p| p == &inner.id), "y should be nested under Inner");
1682 }
1683
1684 #[test]
1685 fn test_ocaml_interface_entity_extraction() {
1686 let code = r#"
1687type t
1688
1689val create : string -> t
1690val to_string : t -> string
1691
1692exception Invalid_input of string
1693
1694module type Serializable = sig
1695 val serialize : t -> string
1696end
1697"#;
1698 let plugin = CodeParserPlugin;
1699 let entities = plugin.extract_entities(code, "example.mli");
1700 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1701 eprintln!("OCaml interface entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1702
1703 let find = |name: &str| entities.iter().find(|e| e.name == name)
1704 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1705
1706 assert_eq!(find("t").entity_type, "type");
1707 assert_eq!(find("create").entity_type, "val");
1708 assert_eq!(find("to_string").entity_type, "val");
1709 assert_eq!(find("Invalid_input").entity_type, "exception");
1710 assert_eq!(find("Serializable").entity_type, "module_type");
1711 }
1712
1713 #[test]
1714 fn test_ocaml_mutual_recursion_let() {
1715 let code = r#"
1716let rec even n = (n = 0) || odd (n - 1)
1717and odd n = (n <> 0) && even (n - 1)
1718
1719let rec ping x = pong (x - 1)
1720and pong x = if x <= 0 then 0 else ping (x - 1)
1721"#;
1722 let plugin = CodeParserPlugin;
1723 let entities = plugin.extract_entities(code, "mutual.ml");
1724 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1725 eprintln!("OCaml mutual let: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1726
1727 let find = |name: &str| entities.iter().find(|e| e.name == name)
1728 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1729
1730 assert_eq!(find("even").entity_type, "function");
1731 assert_eq!(find("odd").entity_type, "function");
1732 assert_eq!(find("ping").entity_type, "function");
1733 assert_eq!(find("pong").entity_type, "function");
1734 }
1735
1736 #[test]
1737 fn test_ocaml_mutual_recursion_module() {
1738 let code = r#"
1739module rec A : sig val x : int end = struct
1740 let x = B.y + 1
1741end
1742and B : sig val y : int end = struct
1743 let y = 0
1744end
1745"#;
1746 let plugin = CodeParserPlugin;
1747 let entities = plugin.extract_entities(code, "mutual_mod.ml");
1748 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1749 eprintln!("OCaml mutual module: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type, &e.parent_id)).collect::<Vec<_>>());
1750
1751 let find = |name: &str| entities.iter().find(|e| e.name == name)
1752 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1753
1754 let a = find("A");
1755 let b = find("B");
1756 assert_eq!(a.entity_type, "module");
1757 assert_eq!(b.entity_type, "module");
1758
1759 let x = find("x");
1760 let y = find("y");
1761 assert!(x.parent_id.as_ref().is_some_and(|p| p == &a.id), "x should be nested under A");
1762 assert!(y.parent_id.as_ref().is_some_and(|p| p == &b.id), "y should be nested under B");
1763 }
1764
1765 #[test]
1766 fn test_ocaml_destructured_let() {
1767 let code = r#"
1768let (a, b) = (1, 2)
1769
1770let { x; y } = point
1771
1772let simple = 42
1773"#;
1774 let plugin = CodeParserPlugin;
1775 let entities = plugin.extract_entities(code, "destruct.ml");
1776 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1777 eprintln!("OCaml destructured: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1778
1779 let find = |name: &str| entities.iter().find(|e| e.name == name)
1780 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1781
1782 assert_eq!(find("a").entity_type, "value");
1783 assert_eq!(find("b").entity_type, "value");
1784 assert_eq!(find("x").entity_type, "value");
1785 assert_eq!(find("y").entity_type, "value");
1786 assert_eq!(find("simple").entity_type, "value");
1787 }
1788
1789 #[test]
1790 fn test_ocaml_mutual_recursion_class() {
1791 let code = r#"
1792class foo = object
1793 method x = 1
1794end
1795and bar = object
1796 method y = 2
1797end
1798"#;
1799 let plugin = CodeParserPlugin;
1800 let entities = plugin.extract_entities(code, "classes.ml");
1801 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1802 eprintln!("OCaml mutual class: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1803
1804 let find = |name: &str| entities.iter().find(|e| e.name == name)
1805 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1806
1807 assert_eq!(find("foo").entity_type, "class");
1808 assert_eq!(find("bar").entity_type, "class");
1809 }
1810
1811 #[test]
1812 fn test_perl_entity_extraction() {
1813 let code = r#"package Foo::Bar;
1814
1815use strict;
1816use warnings;
1817
1818sub hello {
1819 my ($self, $name) = @_;
1820 print "Hello, $name!\n";
1821}
1822
1823sub _private_helper {
1824 return 42;
1825}
1826
18271;
1828"#;
1829 let plugin = CodeParserPlugin;
1830 let entities = plugin.extract_entities(code, "Foo/Bar.pm");
1831 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1832
1833 assert!(names.contains(&"Foo::Bar"), "got: {:?}", names);
1834 assert!(names.contains(&"hello"), "got: {:?}", names);
1835 assert!(names.contains(&"_private_helper"), "got: {:?}", names);
1836
1837 let find = |name: &str| entities.iter().find(|e| e.name == name)
1838 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1839
1840 assert_eq!(find("Foo::Bar").entity_type, "package");
1841 assert_eq!(find("hello").entity_type, "function");
1842 assert_eq!(find("_private_helper").entity_type, "function");
1843 }
1844
1845 #[test]
1846 fn test_fortran_entity_extraction() {
1847 let code = r#"module math_utils
1848 implicit none
1849contains
1850 function add(a, b) result(c)
1851 integer, intent(in) :: a, b
1852 integer :: c
1853 c = a + b
1854 end function add
1855
1856 subroutine greet()
1857 print *, "hello"
1858 end subroutine greet
1859end module math_utils
1860
1861program main
1862 implicit none
1863 print *, "hello"
1864end program main
1865"#;
1866 let plugin = CodeParserPlugin;
1867 let entities = plugin.extract_entities(code, "test.f90");
1868 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1869
1870 assert!(names.contains(&"math_utils"), "got: {:?}", names);
1871 assert!(names.contains(&"add"), "got: {:?}", names);
1872 assert!(names.contains(&"greet"), "got: {:?}", names);
1873 assert!(names.contains(&"main"), "got: {:?}", names);
1874
1875 let find = |name: &str| entities.iter().find(|e| e.name == name)
1876 .unwrap_or_else(|| panic!("Should find {}, got: {:?}", name, names));
1877
1878 assert_eq!(find("math_utils").entity_type, "module");
1879 assert_eq!(find("add").entity_type, "function");
1880 assert_eq!(find("greet").entity_type, "subroutine");
1881 assert_eq!(find("main").entity_type, "program");
1882
1883 assert!(find("add").parent_id.is_some());
1885 assert!(find("greet").parent_id.is_some());
1886 }
1887
1888 #[test]
1889 fn test_scala_entity_extraction() {
1890 let code = r#"
1891package com.example
1892
1893import scala.collection.mutable
1894
1895class UserService(val name: String) {
1896 def getUsers(): List[User] = db.findAll()
1897
1898 def createUser(user: User): Unit = db.save(user)
1899
1900 private def validate(user: User): Boolean = true
1901}
1902
1903object UserService {
1904 def apply(name: String): UserService = new UserService(name)
1905
1906 val DefaultName: String = "default"
1907}
1908
1909trait Repository[T] {
1910 def findById(id: String): Option[T]
1911 def findAll(): List[T]
1912}
1913
1914case class User(id: String, name: String)
1915
1916type UserId = String
1917"#;
1918 let plugin = CodeParserPlugin;
1919 let entities = plugin.extract_entities(code, "UserService.scala");
1920 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1921 eprintln!("Scala entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1922
1923 assert!(names.contains(&"UserService"), "Should find class UserService, got: {:?}", names);
1924 assert!(names.contains(&"Repository"), "Should find trait Repository, got: {:?}", names);
1925 assert!(names.contains(&"getUsers"), "Should find method getUsers, got: {:?}", names);
1926 assert!(names.contains(&"createUser"), "Should find method createUser, got: {:?}", names);
1927
1928 let get_users = entities.iter().find(|e| e.name == "getUsers").unwrap();
1930 assert!(get_users.parent_id.is_some(), "getUsers should have parent_id");
1931 }
1932
1933 #[test]
1934 fn test_scala3_entity_extraction() {
1935 let code = r#"
1936package com.example
1937
1938enum Color:
1939 case Red, Green, Blue
1940
1941enum Planet(mass: Double, radius: Double):
1942 case Mercury extends Planet(3.303e+23, 2.4397e6)
1943 case Venus extends Planet(4.869e+24, 6.0518e6)
1944
1945object Main:
1946 def main(args: Array[String]): Unit =
1947 println("Hello, World!")
1948
1949trait Greeter:
1950 def greet(name: String): String
1951
1952given Greeter with
1953 def greet(name: String): String = s"Hello, $name!"
1954
1955extension (s: String)
1956 def shout: String = s.toUpperCase + "!"
1957
1958type Predicate[A] = A => Boolean
1959"#;
1960 let plugin = CodeParserPlugin;
1961 let entities = plugin.extract_entities(code, "Main.scala");
1962 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
1963 eprintln!("Scala 3 entities: {:?}", entities.iter().map(|e| (&e.name, &e.entity_type)).collect::<Vec<_>>());
1964
1965 assert!(names.contains(&"Color"), "Should find enum Color, got: {:?}", names);
1966 assert!(names.contains(&"Planet"), "Should find enum Planet, got: {:?}", names);
1967 assert!(names.contains(&"Main"), "Should find object Main, got: {:?}", names);
1968 assert!(names.contains(&"Greeter"), "Should find trait Greeter, got: {:?}", names);
1969 assert!(names.contains(&"Predicate"), "Should find type alias Predicate, got: {:?}", names);
1970 }
1971
1972 #[test]
1973 fn test_zig_entity_extraction() {
1974 let code = r#"
1975const std = @import("std");
1976
1977pub const Point = struct {
1978 x: i32,
1979 y: i32,
1980};
1981
1982pub const Color = enum {
1983 red,
1984 green,
1985 blue,
1986};
1987
1988const Person = struct {
1989 name: []const u8,
1990 age: u32,
1991};
1992
1993pub fn greet(name: []const u8) void {
1994 std.debug.print("Hello, {s}!\n", .{name});
1995}
1996
1997fn add(a: i32, b: i32) i32 {
1998 return a + b;
1999}
2000
2001pub fn main() !void {
2002 greet("world");
2003}
2004
2005test "basic addition" {
2006 const result = add(2, 3);
2007 _ = result;
2008}
2009"#;
2010 let plugin = CodeParserPlugin;
2011 let entities = plugin.extract_entities(code, "main.zig");
2012 let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect();
2013 let types: std::collections::HashMap<&str, &str> = entities
2014 .iter()
2015 .map(|e| (e.name.as_str(), e.entity_type.as_str()))
2016 .collect();
2017
2018 assert!(names.contains(&"greet"), "Should find greet, got: {:?}", names);
2019 assert!(names.contains(&"add"), "Should find add, got: {:?}", names);
2020 assert!(names.contains(&"main"), "Should find main, got: {:?}", names);
2021 assert!(names.contains(&"Point"), "Should find Point, got: {:?}", names);
2022 assert!(names.contains(&"Color"), "Should find Color, got: {:?}", names);
2023 assert!(names.contains(&"Person"), "Should find Person, got: {:?}", names);
2024
2025 assert_eq!(types["greet"], "function");
2026 assert_eq!(types["add"], "function");
2027 assert_eq!(types["Point"], "struct");
2028 assert_eq!(types["Color"], "enum");
2029 assert_eq!(types["Person"], "struct");
2030 }
2031}