Skip to main content

sem_core/parser/
registry.rs

1use std::collections::HashMap;
2use std::path::Path;
3#[cfg(feature = "parallel")]
4use rayon::prelude::*;
5
6use crate::model::entity::{build_entity_id, SemanticEntity};
7
8macro_rules! maybe_par_iter {
9    ($slice:expr) => {{
10        #[cfg(feature = "parallel")]
11        { $slice.par_iter() }
12        #[cfg(not(feature = "parallel"))]
13        { $slice.iter() }
14    }};
15}
16use super::plugin::SemanticParserPlugin;
17
18pub struct ParserRegistry {
19    plugins: Vec<Box<dyn SemanticParserPlugin>>,
20    extension_map: HashMap<String, usize>, // ext → index into plugins
21    custom_ext_canonical: HashMap<String, String>, // ".mypy" → ".py" (custom → canonical)
22}
23
24impl ParserRegistry {
25    pub fn new() -> Self {
26        Self {
27            plugins: Vec::new(),
28            extension_map: HashMap::new(),
29            custom_ext_canonical: HashMap::new(),
30        }
31    }
32
33    pub fn register(&mut self, plugin: Box<dyn SemanticParserPlugin>) {
34        let idx = self.plugins.len();
35        for ext in plugin.extensions() {
36            self.extension_map.insert(ext.to_string(), idx);
37        }
38        self.plugins.push(plugin);
39    }
40
41    pub fn get_plugin(&self, file_path: &str) -> Option<&dyn SemanticParserPlugin> {
42        for ext in get_extensions(file_path) {
43            if let Some(&idx) = self.extension_map.get(&ext) {
44                return Some(self.plugins[idx].as_ref());
45            }
46        }
47        // Fallback plugin
48        self.get_plugin_by_id("fallback")
49    }
50
51    pub fn get_explicit_plugin(&self, file_path: &str) -> Option<&dyn SemanticParserPlugin> {
52        for ext in get_extensions(file_path) {
53            if let Some(&idx) = self.extension_map.get(&ext) {
54                return Some(self.plugins[idx].as_ref());
55            }
56        }
57        None
58    }
59
60    pub fn detect_plugin_from_content(&self, content: &str) -> Option<&dyn SemanticParserPlugin> {
61        self.detect_from_shebang(content)
62    }
63
64    /// Try to detect language from shebang line when extension-based lookup fails.
65    /// Call this as a fallback when file content is available.
66    pub fn get_plugin_with_content(&self, file_path: &str, content: &str) -> Option<&dyn SemanticParserPlugin> {
67        // Try extension first
68        for ext in get_extensions(file_path) {
69            if let Some(&idx) = self.extension_map.get(&ext) {
70                return Some(self.plugins[idx].as_ref());
71            }
72        }
73        // Try shebang detection
74        if let Some(plugin) = self.detect_from_shebang(content) {
75            return Some(plugin);
76        }
77        // Fallback plugin
78        self.get_plugin_by_id("fallback")
79    }
80
81    fn detect_from_shebang(&self, content: &str) -> Option<&dyn SemanticParserPlugin> {
82        if let Some(ext) = detect_ext_from_content(content) {
83            if let Some(&idx) = self.extension_map.get(ext.as_str()) {
84                return Some(self.plugins[idx].as_ref());
85            }
86        }
87        None
88    }
89
90    pub fn get_plugin_by_id(&self, id: &str) -> Option<&dyn SemanticParserPlugin> {
91        self.plugins
92            .iter()
93            .find(|p| p.id() == id)
94            .map(|p| p.as_ref())
95    }
96
97    /// Register a custom extension mapping from a .semrc file.
98    /// Maps an extension (e.g. ".inc") to an existing plugin by language name.
99    pub fn add_extension_mapping(&mut self, ext: &str, language: &str) -> bool {
100        let ext = if ext.starts_with('.') {
101            ext.to_lowercase()
102        } else {
103            format!(".{}", ext.to_lowercase())
104        };
105
106        // Find plugin index by matching language name against known extensions
107        let target_ext = LANG_MAPPING
108            .iter()
109            .find(|(kw, _)| *kw == language.to_lowercase())
110            .map(|(_, e)| *e);
111
112        if let Some(target) = target_ext {
113            if let Some(&idx) = self.extension_map.get(target) {
114                self.custom_ext_canonical.insert(ext.clone(), target.to_string());
115                self.extension_map.insert(ext, idx);
116                return true;
117            }
118        }
119
120        // Also try matching directly against registered extensions
121        let direct_ext = format!(".{}", language.to_lowercase());
122        if let Some(&idx) = self.extension_map.get(&direct_ext) {
123            self.custom_ext_canonical.insert(ext.clone(), direct_ext);
124            self.extension_map.insert(ext, idx);
125            return true;
126        }
127
128        false
129    }
130
131    /// Load extension mappings from a .semrc file at the given root directory.
132    /// File format (one mapping per line): `.ext = language`
133    /// Example:
134    ///   .inc = php
135    ///   .j = json
136    ///   .xyz = cpp
137    pub fn load_semrc(&mut self, root: &Path) {
138        let semrc_path = root.join(".semrc");
139        if !semrc_path.exists() {
140            return;
141        }
142        let content = match std::fs::read_to_string(&semrc_path) {
143            Ok(c) => c,
144            Err(_) => return,
145        };
146        for line in content.lines() {
147            let line = line.trim();
148            if line.is_empty() || line.starts_with('#') {
149                continue;
150            }
151            if let Some((ext, lang)) = line.split_once('=') {
152                self.add_extension_mapping(ext.trim(), lang.trim());
153            }
154        }
155    }
156
157    /// Load extension mappings from `.gitattributes` at the given root directory.
158    /// Parses `*.ext diff=language` and `*.ext linguist-language=Language` patterns.
159    /// Only processes `*.ext` glob patterns (not path-based patterns).
160    pub fn load_gitattributes(&mut self, root: &Path) {
161        let ga_path = root.join(".gitattributes");
162        if !ga_path.exists() {
163            return;
164        }
165        let content = match std::fs::read_to_string(&ga_path) {
166            Ok(c) => c,
167            Err(_) => return,
168        };
169        for line in content.lines() {
170            let line = line.trim();
171            if line.is_empty() || line.starts_with('#') {
172                continue;
173            }
174            let mut parts = line.split_whitespace();
175            let pattern = match parts.next() {
176                Some(p) => p,
177                None => continue,
178            };
179            // Only handle *.ext patterns
180            let ext = match pattern.strip_prefix("*.") {
181                Some(e) => e,
182                None => continue,
183            };
184            // Already mapped (e.g. by .semrc which takes priority)
185            let ext_key = format!(".{}", ext.to_lowercase());
186            if self.custom_ext_canonical.contains_key(&ext_key) {
187                continue;
188            }
189            // Look for diff= or linguist-language= attributes
190            for attr in parts {
191                if let Some(lang) = attr.strip_prefix("diff=") {
192                    self.add_extension_mapping(ext, lang);
193                    break;
194                }
195                if let Some(lang) = attr.strip_prefix("linguist-language=") {
196                    self.add_extension_mapping(ext, lang);
197                    break;
198                }
199            }
200        }
201    }
202
203    /// Resolve custom extension mappings in a file path.
204    /// E.g. if `.mypy` is mapped to `python` (canonical `.py`),
205    /// `"utils.mypy"` becomes `"utils.py"`.
206    pub fn resolve_file_path(&self, file_path: &str) -> Option<String> {
207        let path = Path::new(file_path);
208        let ext = path
209            .extension()
210            .and_then(|e| e.to_str())
211            .map(|e| format!(".{}", e.to_lowercase()))?;
212
213        let canonical = self.custom_ext_canonical.get(&ext)?;
214        let stem = path.file_stem().and_then(|s| s.to_str())?;
215
216        if let Some(parent) = path.parent().filter(|p| !p.as_os_str().is_empty()) {
217            Some(format!("{}/{}{}", parent.display(), stem, canonical))
218        } else {
219            Some(format!("{}{}", stem, canonical))
220        }
221    }
222
223    /// Extract entities, transparently handling custom extension mappings.
224    /// Uses the resolved path for language detection but restores the original
225    /// file path in entity metadata (file_path, id, parent_id).
226    pub fn extract_entities(&self, file_path: &str, content: &str) -> Vec<SemanticEntity> {
227        let resolved = self.resolve_file_path(file_path);
228        let detection_path = resolved.as_deref().unwrap_or(file_path);
229
230        let plugin = match self.get_plugin_with_content(detection_path, content) {
231            Some(p) => p,
232            None => return Vec::new(),
233        };
234
235        let mut entities = plugin.extract_entities(content, detection_path);
236        if let Some(ref rp) = resolved {
237            fix_entity_paths(&mut entities, file_path, rp);
238        }
239        entities
240    }
241
242    /// Extract entities with tree, transparently handling custom extension mappings.
243    pub fn extract_entities_with_tree(
244        &self,
245        file_path: &str,
246        content: &str,
247    ) -> Option<(Vec<SemanticEntity>, Option<tree_sitter::Tree>)> {
248        let resolved = self.resolve_file_path(file_path);
249        let detection_path = resolved.as_deref().unwrap_or(file_path);
250
251        let plugin = self.get_plugin_with_content(detection_path, content)?;
252        let (mut entities, tree) = plugin.extract_entities_with_tree(content, detection_path);
253        if let Some(ref rp) = resolved {
254            fix_entity_paths(&mut entities, file_path, rp);
255        }
256        Some((entities, tree))
257    }
258
259    /// Extract entities from multiple files in parallel.
260    pub fn extract_all_entities(
261        &self,
262        root: &Path,
263        file_paths: &[String],
264    ) -> Vec<SemanticEntity> {
265        let mut entities: Vec<SemanticEntity> = maybe_par_iter!(file_paths)
266            .flat_map(|fp| {
267                let full = root.join(fp);
268                let content = match std::fs::read_to_string(&full) {
269                    Ok(c) => c,
270                    Err(_) => return Vec::new(),
271                };
272                self.extract_entities(fp, &content)
273            })
274            .collect();
275        resolve_go_method_parent_ids(&mut entities);
276        entities
277    }
278}
279
280pub fn resolve_go_method_parent_ids(entities: &mut [SemanticEntity]) {
281    let mut types_by_package: HashMap<(String, String, String), String> = HashMap::new();
282
283    for entity in entities.iter() {
284        if !is_go_file(&entity.file_path) || !is_go_receiver_type_entity(entity) {
285            continue;
286        }
287
288        let package_name = go_package_name(entity).unwrap_or("");
289
290        types_by_package
291            .entry((
292                go_package_dir(&entity.file_path).to_string(),
293                package_name.to_string(),
294                entity.name.clone(),
295            ))
296            .or_insert_with(|| entity.id.clone());
297    }
298
299    for entity in entities.iter_mut() {
300        if !is_go_file(&entity.file_path) || entity.entity_type != "method" {
301            continue;
302        }
303
304        let package_name = go_package_name(entity).unwrap_or("");
305        let Some(receiver_name) = extract_go_receiver_type_name(&entity.content) else {
306            continue;
307        };
308
309        let key = (
310            go_package_dir(&entity.file_path).to_string(),
311            package_name.to_string(),
312            receiver_name,
313        );
314
315        let Some(parent_id) = types_by_package.get(&key) else {
316            continue;
317        };
318
319        if entity.parent_id.as_deref() == Some(parent_id.as_str()) {
320            continue;
321        }
322
323        entity.parent_id = Some(parent_id.clone());
324        entity.id = build_entity_id(
325            &entity.file_path,
326            &entity.entity_type,
327            &entity.name,
328            Some(parent_id),
329        );
330    }
331}
332
333fn is_go_file(file_path: &str) -> bool {
334    file_path.ends_with(".go")
335}
336
337fn is_go_receiver_type_entity(entity: &SemanticEntity) -> bool {
338    matches!(
339        entity.entity_type.as_str(),
340        "type" | "struct" | "class" | "interface"
341    )
342}
343
344fn go_package_name(entity: &SemanticEntity) -> Option<&str> {
345    entity
346        .metadata
347        .as_ref()
348        .and_then(|metadata| metadata.get("go.package"))
349        .map(String::as_str)
350}
351
352fn go_package_dir(file_path: &str) -> &str {
353    file_path.rsplit_once('/').map_or("", |(dir, _)| dir)
354}
355
356fn extract_go_receiver_type_name(content: &str) -> Option<String> {
357    let after_func = content.trim_start().strip_prefix("func")?.trim_start();
358    let receiver = after_func.strip_prefix('(')?;
359    let receiver_end = receiver.find(')')?;
360    let receiver = receiver[..receiver_end].trim();
361    if receiver.is_empty() {
362        return None;
363    }
364
365    let receiver_type = receiver.split_whitespace().last().unwrap_or(receiver);
366
367    let receiver_type = receiver_type.trim_start_matches('*').trim();
368    let receiver_type = receiver_type
369        .split_once('[')
370        .map_or(receiver_type, |(name, _)| name)
371        .trim();
372    let receiver_type = receiver_type
373        .rsplit_once('.')
374        .map_or(receiver_type, |(_, name)| name)
375        .trim();
376
377    (!receiver_type.is_empty()).then(|| receiver_type.to_string())
378}
379
380/// Restore original file path in entities when a custom extension mapping was used.
381fn fix_entity_paths(entities: &mut [SemanticEntity], original: &str, resolved: &str) {
382    for entity in entities {
383        entity.file_path = original.to_string();
384        entity.id = entity.id.replace(resolved, original);
385        if let Some(ref mut pid) = entity.parent_id {
386            *pid = pid.replace(resolved, original);
387        }
388    }
389}
390
391fn get_extensions(file_path: &str) -> Vec<String> {
392    let Some(file_name) = Path::new(file_path)
393        .file_name()
394        .and_then(|name| name.to_str())
395    else {
396        return Vec::new();
397    };
398
399    let file_name = file_name.to_lowercase();
400    let mut extensions = Vec::new();
401
402    for (idx, ch) in file_name.char_indices() {
403        if ch == '.' {
404            extensions.push(file_name[idx..].to_string());
405        }
406    }
407
408    extensions
409}
410
411const LANG_MAPPING: &[(&str, &str)] = &[
412    ("perl", ".pl"),
413    ("python", ".py"),
414    ("ruby", ".rb"),
415    ("bash", ".sh"),
416    ("shell", ".sh"),
417    ("/sh", ".sh"),
418    ("node", ".js"),
419    ("javascript", ".js"),
420    ("typescript", ".ts"),
421    ("tsx", ".tsx"),
422    ("swift", ".swift"),
423    ("elixir", ".ex"),
424    ("rust", ".rs"),
425    ("go", ".go"),
426    ("golang", ".go"),
427    ("kotlin", ".kt"),
428    ("dart", ".dart"),
429    ("php", ".php"),
430    ("java", ".java"),
431    ("c", ".c"),
432    ("cpp", ".cpp"),
433    ("c++", ".cpp"),
434    ("cs", ".cs"),
435    ("csharp", ".cs"),
436    ("c#", ".cs"),
437    ("fortran", ".f90"),
438    ("terraform", ".tf"),
439    ("hcl", ".hcl"),
440    ("ocaml", ".ml"),
441    ("scala", ".scala"),
442    ("haskell", ".hs"),
443    ("zig", ".zig"),
444    ("xml", ".xml"),
445    ("json", ".json"),
446    ("yaml", ".yaml"),
447    ("yml", ".yaml"),
448    ("toml", ".toml"),
449    ("markdown", ".md"),
450    ("csv", ".csv"),
451    ("eruby", ".erb"),
452    ("erb", ".erb"),
453    ("vue", ".vue"),
454    ("svelte", ".svelte"),
455];
456
457/// Detect file extension from shebang line, vim modeline, or content heuristics.
458pub fn detect_ext_from_content(content: &str) -> Option<String> {
459    // Try shebang (first line)
460    if let Some(first_line) = content.lines().next() {
461        if first_line.starts_with("#!") {
462            let shebang = first_line.to_lowercase();
463            for (keyword, ext) in LANG_MAPPING {
464                if shebang.contains(keyword) {
465                    return Some(ext.to_string());
466                }
467            }
468        }
469    }
470
471    // Try vim modeline (first 5 or last 5 lines)
472    // Formats: `vim: ft=perl`, `vim: filetype=perl`, `vim: set ft=perl`
473    let lines: Vec<&str> = content.lines().collect();
474    let check_lines = lines.iter().take(5).chain(lines.iter().rev().take(5));
475    for line in check_lines {
476        if let Some(ft) = extract_vim_filetype(line) {
477            let ft_lower = ft.to_lowercase();
478            for (keyword, ext) in LANG_MAPPING {
479                if ft_lower == *keyword {
480                    return Some(ext.to_string());
481                }
482            }
483        }
484    }
485
486    // Try content heuristics (first-line markers and early declarations)
487    if let Some(ext) = detect_from_content_heuristics(content) {
488        return Some(ext);
489    }
490
491    None
492}
493
494/// High-confidence content-based language detection.
495/// Only uses markers with near-zero false-positive rates.
496fn detect_from_content_heuristics(content: &str) -> Option<String> {
497    let first_line = content.lines().next().unwrap_or("").trim();
498
499    // PHP: opening tag is unambiguous
500    if first_line.starts_with("<?php") || first_line.starts_with("<?PHP") {
501        return Some(".php".to_string());
502    }
503
504    // XML/SVG/HTML: XML declaration or doctype
505    if first_line.starts_with("<?xml") {
506        return Some(".xml".to_string());
507    }
508    if first_line.starts_with("<!DOCTYPE") || first_line.starts_with("<!doctype") {
509        return Some(".xml".to_string());
510    }
511
512    // Scan first ~20 lines for language-specific patterns
513    for line in content.lines().take(20) {
514        let trimmed = line.trim();
515
516        // PHP: opening tag anywhere in early lines
517        if trimmed.starts_with("<?php") || trimmed.starts_with("<?PHP") || trimmed == "<?=" {
518            return Some(".php".to_string());
519        }
520
521        // C/C++: #include directive
522        if trimmed.starts_with("#include ") || trimmed.starts_with("#include\t") {
523            // Could be C or C++. Check for C++ indicators.
524            if content.lines().take(30).any(|l| {
525                let t = l.trim();
526                t.starts_with("using namespace")
527                    || t.starts_with("class ")
528                    || t.starts_with("#include <iostream")
529                    || t.starts_with("#include <vector")
530                    || t.starts_with("#include <string>")
531                    || t.starts_with("#include <memory")
532            }) {
533                return Some(".cpp".to_string());
534            }
535            return Some(".c".to_string());
536        }
537
538        // Java: package declaration with dots
539        if trimmed.starts_with("package ") && trimmed.contains('.') && trimmed.ends_with(';') {
540            return Some(".java".to_string());
541        }
542
543        // Go: package declaration without dots or semicolons
544        if trimmed.starts_with("package ") && !trimmed.contains('.') && !trimmed.contains(';') {
545            return Some(".go".to_string());
546        }
547
548        // Rust: common top-level declarations
549        if (trimmed.starts_with("use std::") || trimmed.starts_with("use crate::"))
550            && trimmed.ends_with(';')
551        {
552            return Some(".rs".to_string());
553        }
554
555        // Elixir: defmodule
556        if trimmed.starts_with("defmodule ") {
557            return Some(".ex".to_string());
558        }
559
560        // Kotlin: package with dots but no semicolon (Kotlin doesn't require semicolons)
561        if trimmed.starts_with("package ") && trimmed.contains('.') && !trimmed.ends_with(';') {
562            return Some(".kt".to_string());
563        }
564
565        // C#: using System or namespace with braces
566        if trimmed.starts_with("using System") && trimmed.ends_with(';') {
567            return Some(".cs".to_string());
568        }
569        if trimmed.starts_with("namespace ") && trimmed.ends_with('{') {
570            // Could be C++ too, but C++ usually has #include before namespace
571            // If we got here without matching #include, it's likely C#
572            return Some(".cs".to_string());
573        }
574
575        // Swift: import Foundation/UIKit/SwiftUI
576        if trimmed == "import Foundation"
577            || trimmed == "import UIKit"
578            || trimmed == "import SwiftUI"
579            || trimmed == "import Combine"
580        {
581            return Some(".swift".to_string());
582        }
583
584        // Dart: import 'dart:
585        if trimmed.starts_with("import 'dart:") || trimmed.starts_with("import \"dart:") {
586            return Some(".dart".to_string());
587        }
588
589        // Scala: object/trait at top level
590        if trimmed.starts_with("object ") || trimmed.starts_with("trait ") {
591            return Some(".scala".to_string());
592        }
593
594        // Zig: const std = @import
595        if trimmed.contains("@import(") {
596            return Some(".zig".to_string());
597        }
598
599        // HCL/Terraform: resource/variable/terraform blocks
600        if trimmed.starts_with("resource \"")
601            || trimmed.starts_with("variable \"")
602            || trimmed.starts_with("terraform {")
603            || trimmed.starts_with("provider \"")
604        {
605            return Some(".tf".to_string());
606        }
607
608        // Fortran: program/module/subroutine (case-insensitive)
609        let lower = trimmed.to_lowercase();
610        if lower.starts_with("program ") || lower.starts_with("module ")
611            || lower.starts_with("subroutine ") || lower == "implicit none"
612        {
613            // "module " could be Ruby, but Ruby uses "module X" without "implicit none"
614            // Check for Fortran-specific follow-up
615            if lower.starts_with("program ") || lower == "implicit none" {
616                return Some(".f90".to_string());
617            }
618            if content.lines().take(20).any(|l| l.trim().to_lowercase() == "implicit none") {
619                return Some(".f90".to_string());
620            }
621        }
622
623        // Python: def/class at indentation level 0 with colon
624        if (trimmed.starts_with("def ") || trimmed.starts_with("class "))
625            && trimmed.ends_with(':')
626            && line.starts_with(trimmed.chars().next().unwrap_or(' '))
627        {
628            return Some(".py".to_string());
629        }
630
631        // Ruby: require or module/class without colon (Python uses colon)
632        if trimmed.starts_with("require '") || trimmed.starts_with("require \"")
633            || trimmed.starts_with("require_relative ")
634        {
635            return Some(".rb".to_string());
636        }
637
638        // Perl: use strict/warnings, or variable declarations with sigils
639        if trimmed == "use strict;"
640            || trimmed == "use warnings;"
641            || trimmed.starts_with("my $")
642            || trimmed.starts_with("my @")
643            || trimmed.starts_with("my %")
644        {
645            return Some(".pl".to_string());
646        }
647    }
648
649    None
650}
651
652fn extract_vim_filetype(line: &str) -> Option<&str> {
653    // Match patterns: `vim: ft=X`, `vim: filetype=X`, `vim: set ft=X`
654    let line = line.trim();
655    let vim_idx = line.find("vim:")?;
656    let after_vim = &line[vim_idx + 4..];
657
658    for token in after_vim.split_whitespace() {
659        if let Some(val) = token.strip_prefix("ft=") {
660            return Some(val.trim_end_matches(':'));
661        }
662        if let Some(val) = token.strip_prefix("filetype=") {
663            return Some(val.trim_end_matches(':'));
664        }
665    }
666    None
667}
668
669#[cfg(test)]
670mod tests {
671    use crate::parser::plugins::create_default_registry;
672    use tempfile::TempDir;
673
674    fn write_file(dir: &TempDir, name: &str, content: &str) {
675        let path = dir.path().join(name);
676        if let Some(parent) = path.parent() {
677            std::fs::create_dir_all(parent).unwrap();
678        }
679        std::fs::write(path, content).unwrap();
680    }
681
682    #[test]
683    fn test_registry_matches_compound_svelte_typescript_suffix() {
684        let registry = create_default_registry();
685        let plugin = registry
686            .get_plugin("src/routes/+page.svelte.ts")
687            .expect("plugin should exist");
688
689        assert_eq!(plugin.id(), "svelte");
690    }
691
692    #[test]
693    fn test_registry_matches_compound_svelte_javascript_suffix() {
694        let registry = create_default_registry();
695        let plugin = registry
696            .get_plugin("src/routes/+layout.svelte.js")
697            .expect("plugin should exist");
698
699        assert_eq!(plugin.id(), "svelte");
700    }
701
702    #[test]
703    fn test_registry_matches_svelte_test_suffix() {
704        let registry = create_default_registry();
705        let plugin = registry
706            .get_plugin("src/lib/multiplier.svelte.test.js")
707            .expect("plugin should exist");
708
709        assert_eq!(plugin.id(), "svelte");
710    }
711
712    #[test]
713    fn test_registry_prefers_svelte_plugin_for_component_files() {
714        let registry = create_default_registry();
715        let plugin = registry
716            .get_plugin("src/lib/Component.svelte")
717            .expect("plugin should exist");
718
719        assert_eq!(plugin.id(), "svelte");
720    }
721
722    #[test]
723    fn test_registry_matches_typescript_module_suffix() {
724        let registry = create_default_registry();
725        let plugin = registry
726            .get_plugin("src/lib/index.mts")
727            .expect("plugin should exist");
728
729        assert_eq!(plugin.id(), "code");
730    }
731
732    #[test]
733    fn test_registry_matches_typescript_commonjs_suffix() {
734        let registry = create_default_registry();
735        let plugin = registry
736            .get_plugin("src/lib/index.cts")
737            .expect("plugin should exist");
738
739        assert_eq!(plugin.id(), "code");
740    }
741
742    #[test]
743    fn test_detect_php_from_opening_tag() {
744        let registry = create_default_registry();
745        let content = "<?php\nclass Vendor {\n    function get_name() { return $this->name; }\n}\n";
746        let plugin = registry
747            .get_plugin_with_content("vendor.inc2", content)
748            .expect("should detect PHP");
749        let entities = plugin.extract_entities(content, "vendor.inc2");
750        assert!(entities.iter().any(|e| e.entity_type == "class"));
751    }
752
753    #[test]
754    fn test_detect_c_from_include() {
755        let registry = create_default_registry();
756        let content = "#include <stdio.h>\n\nint main() {\n    printf(\"hello\");\n    return 0;\n}\n";
757        let plugin = registry
758            .get_plugin_with_content("main.xyz", content)
759            .expect("should detect C");
760        let entities = plugin.extract_entities(content, "main.xyz");
761        assert!(entities.iter().any(|e| e.name == "main"));
762    }
763
764    #[test]
765    fn test_detect_java_from_package() {
766        let registry = create_default_registry();
767        let content = "package com.example.app;\n\npublic class Main {\n    public static void main(String[] args) {}\n}\n";
768        let plugin = registry
769            .get_plugin_with_content("Main", content)
770            .expect("should detect Java");
771        let entities = plugin.extract_entities(content, "Main");
772        assert!(entities.iter().any(|e| e.name == "Main"));
773    }
774
775    #[test]
776    fn test_detect_go_from_package() {
777        let registry = create_default_registry();
778        let content = "package main\n\nimport \"fmt\"\n\nfunc hello() {\n    fmt.Println(\"hi\")\n}\n";
779        let plugin = registry
780            .get_plugin_with_content("main", content)
781            .expect("should detect Go");
782        let entities = plugin.extract_entities(content, "main");
783        assert!(entities.iter().any(|e| e.name == "hello"));
784    }
785
786    #[test]
787    fn test_detect_rust_from_use_std() {
788        let registry = create_default_registry();
789        let content = "use std::collections::HashMap;\n\nfn process() {\n    let m = HashMap::new();\n}\n";
790        let plugin = registry
791            .get_plugin_with_content("lib", content)
792            .expect("should detect Rust");
793        let entities = plugin.extract_entities(content, "lib");
794        assert!(entities.iter().any(|e| e.name == "process"));
795    }
796
797    #[cfg(feature = "lang-go")]
798    #[test]
799    fn test_go_method_parent_resolves_across_files() {
800        let registry = create_default_registry();
801        let dir = TempDir::new().unwrap();
802        write_file(&dir, "models.go", "package demo\n\ntype Service struct{}\n");
803        write_file(
804            &dir,
805            "methods.go",
806            "package demo\n\nfunc (s *Service) Run() {}\n",
807        );
808
809        let entities = registry.extract_all_entities(
810            dir.path(),
811            &["models.go".to_string(), "methods.go".to_string()],
812        );
813        let service = entities
814            .iter()
815            .find(|e| e.name == "Service" && e.file_path == "models.go")
816            .expect("Service type should be extracted");
817        let run = entities
818            .iter()
819            .find(|e| e.name == "Run" && e.file_path == "methods.go")
820            .expect("Run method should be extracted");
821
822        assert_eq!(run.parent_id.as_deref(), Some(service.id.as_str()));
823        assert_eq!(run.id, format!("{}::Run", service.id));
824    }
825
826    #[cfg(feature = "lang-go")]
827    #[test]
828    fn test_go_method_parent_resolution_is_package_directory_scoped() {
829        let registry = create_default_registry();
830        let dir = TempDir::new().unwrap();
831        write_file(&dir, "alpha/models.go", "package demo\n\ntype Service struct{}\n");
832        write_file(
833            &dir,
834            "alpha/methods.go",
835            "package demo\n\nfunc (s *Service) Run() {}\n",
836        );
837        write_file(&dir, "beta/models.go", "package demo\n\ntype Service struct{}\n");
838        write_file(
839            &dir,
840            "beta/methods.go",
841            "package demo\n\nfunc (s *Service) Run() {}\n",
842        );
843
844        let entities = registry.extract_all_entities(
845            dir.path(),
846            &[
847                "alpha/models.go".to_string(),
848                "alpha/methods.go".to_string(),
849                "beta/models.go".to_string(),
850                "beta/methods.go".to_string(),
851            ],
852        );
853
854        let alpha_service = entities
855            .iter()
856            .find(|e| e.name == "Service" && e.file_path == "alpha/models.go")
857            .expect("alpha Service type should be extracted");
858        let beta_service = entities
859            .iter()
860            .find(|e| e.name == "Service" && e.file_path == "beta/models.go")
861            .expect("beta Service type should be extracted");
862        let alpha_run = entities
863            .iter()
864            .find(|e| e.name == "Run" && e.file_path == "alpha/methods.go")
865            .expect("alpha Run method should be extracted");
866        let beta_run = entities
867            .iter()
868            .find(|e| e.name == "Run" && e.file_path == "beta/methods.go")
869            .expect("beta Run method should be extracted");
870
871        assert_eq!(alpha_run.parent_id.as_deref(), Some(alpha_service.id.as_str()));
872        assert_eq!(beta_run.parent_id.as_deref(), Some(beta_service.id.as_str()));
873    }
874
875    #[test]
876    fn test_extension_takes_priority_over_heuristics() {
877        let registry = create_default_registry();
878        // Content looks like PHP but file has .py extension
879        let content = "<?php\nclass Foo {}\n";
880        let plugin = registry
881            .get_plugin_with_content("script.py", content)
882            .expect("should use Python parser");
883        assert_eq!(plugin.id(), "code"); // Python uses code plugin, not PHP
884    }
885
886    #[test]
887    fn test_custom_extension_mapping_extracts_entities() {
888        let mut registry = create_default_registry();
889        registry.add_extension_mapping(".mypy", "python");
890
891        let content = "def hello():\n    print(\"hello world\")\n\nclass Calculator:\n    def multiply(self, a, b):\n        return a * b\n";
892        let entities = registry.extract_entities("utils.mypy", content);
893
894        assert!(!entities.is_empty(), "Should extract entities via custom mapping");
895        assert!(entities.iter().any(|e| e.name == "hello"), "Should find hello function");
896        assert!(entities.iter().any(|e| e.name == "Calculator"), "Should find Calculator class");
897        assert!(entities.iter().any(|e| e.name == "multiply"), "Should find multiply method");
898
899        // File path should preserve the original extension
900        for entity in &entities {
901            assert_eq!(entity.file_path, "utils.mypy", "Entity file_path should use original extension");
902            assert!(entity.id.starts_with("utils.mypy::"), "Entity ID should use original file path");
903        }
904    }
905}