Skip to main content

sem_core/parser/
registry.rs

1use std::collections::HashMap;
2use std::path::Path;
3#[cfg(feature = "parallel")]
4use rayon::prelude::*;
5
6use crate::model::entity::{build_entity_id, SemanticEntity};
7
8macro_rules! maybe_par_iter {
9    ($slice:expr) => {{
10        #[cfg(feature = "parallel")]
11        { $slice.par_iter() }
12        #[cfg(not(feature = "parallel"))]
13        { $slice.iter() }
14    }};
15}
16use super::plugin::SemanticParserPlugin;
17
18pub struct ParserRegistry {
19    plugins: Vec<Box<dyn SemanticParserPlugin>>,
20    extension_map: HashMap<String, usize>, // ext → index into plugins
21    custom_ext_canonical: HashMap<String, String>, // ".mypy" → ".py" (custom → canonical)
22    pub custom_test_dirs: Vec<String>,
23}
24
25impl ParserRegistry {
26    pub fn new() -> Self {
27        Self {
28            plugins: Vec::new(),
29            extension_map: HashMap::new(),
30            custom_ext_canonical: HashMap::new(),
31            custom_test_dirs: Vec::new(),
32        }
33    }
34
35    pub fn register(&mut self, plugin: Box<dyn SemanticParserPlugin>) {
36        let idx = self.plugins.len();
37        for ext in plugin.extensions() {
38            self.extension_map.insert(ext.to_string(), idx);
39        }
40        self.plugins.push(plugin);
41    }
42
43    pub fn get_plugin(&self, file_path: &str) -> Option<&dyn SemanticParserPlugin> {
44        for ext in get_extensions(file_path) {
45            if let Some(&idx) = self.extension_map.get(&ext) {
46                return Some(self.plugins[idx].as_ref());
47            }
48        }
49        // Fallback plugin
50        self.get_plugin_by_id("fallback")
51    }
52
53    pub fn get_explicit_plugin(&self, file_path: &str) -> Option<&dyn SemanticParserPlugin> {
54        for ext in get_extensions(file_path) {
55            if let Some(&idx) = self.extension_map.get(&ext) {
56                return Some(self.plugins[idx].as_ref());
57            }
58        }
59        None
60    }
61
62    pub fn detect_plugin_from_content(&self, content: &str) -> Option<&dyn SemanticParserPlugin> {
63        self.detect_from_shebang(content)
64    }
65
66    /// Try to detect language from shebang line when extension-based lookup fails.
67    /// Call this as a fallback when file content is available.
68    pub fn get_plugin_with_content(&self, file_path: &str, content: &str) -> Option<&dyn SemanticParserPlugin> {
69        // Try extension first
70        for ext in get_extensions(file_path) {
71            if let Some(&idx) = self.extension_map.get(&ext) {
72                return Some(self.plugins[idx].as_ref());
73            }
74        }
75        // Try shebang detection
76        if let Some(plugin) = self.detect_from_shebang(content) {
77            return Some(plugin);
78        }
79        // Fallback plugin
80        self.get_plugin_by_id("fallback")
81    }
82
83    fn detect_from_shebang(&self, content: &str) -> Option<&dyn SemanticParserPlugin> {
84        if let Some(ext) = detect_ext_from_content(content) {
85            if let Some(&idx) = self.extension_map.get(ext.as_str()) {
86                return Some(self.plugins[idx].as_ref());
87            }
88        }
89        None
90    }
91
92    pub fn get_plugin_by_id(&self, id: &str) -> Option<&dyn SemanticParserPlugin> {
93        self.plugins
94            .iter()
95            .find(|p| p.id() == id)
96            .map(|p| p.as_ref())
97    }
98
99    /// Register a custom extension mapping from a .semrc file.
100    /// Maps an extension (e.g. ".inc") to an existing plugin by language name.
101    pub fn add_extension_mapping(&mut self, ext: &str, language: &str) -> bool {
102        let ext = if ext.starts_with('.') {
103            ext.to_lowercase()
104        } else {
105            format!(".{}", ext.to_lowercase())
106        };
107
108        // Find plugin index by matching language name against known extensions
109        let target_ext = LANG_MAPPING
110            .iter()
111            .find(|(kw, _)| *kw == language.to_lowercase())
112            .map(|(_, e)| *e);
113
114        if let Some(target) = target_ext {
115            if let Some(&idx) = self.extension_map.get(target) {
116                self.custom_ext_canonical.insert(ext.clone(), target.to_string());
117                self.extension_map.insert(ext, idx);
118                return true;
119            }
120        }
121
122        // Also try matching directly against registered extensions
123        let direct_ext = format!(".{}", language.to_lowercase());
124        if let Some(&idx) = self.extension_map.get(&direct_ext) {
125            self.custom_ext_canonical.insert(ext.clone(), direct_ext);
126            self.extension_map.insert(ext, idx);
127            return true;
128        }
129
130        false
131    }
132
133    /// Load extension mappings from a .semrc file at the given root directory.
134    /// File format (one mapping per line): `.ext = language`
135    /// Example:
136    ///   .inc = php
137    ///   .j = json
138    ///   .xyz = cpp
139    pub fn load_semrc(&mut self, root: &Path) {
140        let semrc_path = root.join(".semrc");
141        if !semrc_path.exists() {
142            return;
143        }
144        let content = match std::fs::read_to_string(&semrc_path) {
145            Ok(c) => c,
146            Err(_) => return,
147        };
148        for line in content.lines() {
149            let line = line.trim();
150            if line.is_empty() || line.starts_with('#') {
151                continue;
152            }
153            if let Some((key, value)) = line.split_once('=') {
154                let key = key.trim();
155                let value = value.trim();
156                if key == "test-dirs" {
157                    self.custom_test_dirs = value
158                        .split(',')
159                        .map(|s| s.trim().to_string())
160                        .filter(|s| !s.is_empty())
161                        .collect();
162                } else {
163                    self.add_extension_mapping(key, value);
164                }
165            }
166        }
167    }
168
169    /// Load extension mappings from `.gitattributes` at the given root directory.
170    /// Parses `*.ext diff=language` and `*.ext linguist-language=Language` patterns.
171    /// Only processes `*.ext` glob patterns (not path-based patterns).
172    pub fn load_gitattributes(&mut self, root: &Path) {
173        let ga_path = root.join(".gitattributes");
174        if !ga_path.exists() {
175            return;
176        }
177        let content = match std::fs::read_to_string(&ga_path) {
178            Ok(c) => c,
179            Err(_) => return,
180        };
181        for line in content.lines() {
182            let line = line.trim();
183            if line.is_empty() || line.starts_with('#') {
184                continue;
185            }
186            let mut parts = line.split_whitespace();
187            let pattern = match parts.next() {
188                Some(p) => p,
189                None => continue,
190            };
191            // Only handle *.ext patterns
192            let ext = match pattern.strip_prefix("*.") {
193                Some(e) => e,
194                None => continue,
195            };
196            // Already mapped (e.g. by .semrc which takes priority)
197            let ext_key = format!(".{}", ext.to_lowercase());
198            if self.custom_ext_canonical.contains_key(&ext_key) {
199                continue;
200            }
201            // Look for diff= or linguist-language= attributes
202            for attr in parts {
203                if let Some(lang) = attr.strip_prefix("diff=") {
204                    self.add_extension_mapping(ext, lang);
205                    break;
206                }
207                if let Some(lang) = attr.strip_prefix("linguist-language=") {
208                    self.add_extension_mapping(ext, lang);
209                    break;
210                }
211            }
212        }
213    }
214
215    /// Resolve custom extension mappings in a file path.
216    /// E.g. if `.mypy` is mapped to `python` (canonical `.py`),
217    /// `"utils.mypy"` becomes `"utils.py"`.
218    pub fn resolve_file_path(&self, file_path: &str) -> Option<String> {
219        let path = Path::new(file_path);
220        let ext = path
221            .extension()
222            .and_then(|e| e.to_str())
223            .map(|e| format!(".{}", e.to_lowercase()))?;
224
225        let canonical = self.custom_ext_canonical.get(&ext)?;
226        let stem = path.file_stem().and_then(|s| s.to_str())?;
227
228        if let Some(parent) = path.parent().filter(|p| !p.as_os_str().is_empty()) {
229            Some(format!("{}/{}{}", parent.display(), stem, canonical))
230        } else {
231            Some(format!("{}{}", stem, canonical))
232        }
233    }
234
235    /// Extract entities, transparently handling custom extension mappings.
236    /// Uses the resolved path for language detection but restores the original
237    /// file path in entity metadata (file_path, id, parent_id).
238    pub fn extract_entities(&self, file_path: &str, content: &str) -> Vec<SemanticEntity> {
239        let resolved = self.resolve_file_path(file_path);
240        let detection_path = resolved.as_deref().unwrap_or(file_path);
241
242        let plugin = match self.get_plugin_with_content(detection_path, content) {
243            Some(p) => p,
244            None => return Vec::new(),
245        };
246
247        let mut entities = plugin.extract_entities(content, detection_path);
248        if let Some(ref rp) = resolved {
249            fix_entity_paths(&mut entities, file_path, rp);
250        }
251        entities
252    }
253
254    /// Extract entities with tree, transparently handling custom extension mappings.
255    pub fn extract_entities_with_tree(
256        &self,
257        file_path: &str,
258        content: &str,
259    ) -> Option<(Vec<SemanticEntity>, Option<tree_sitter::Tree>)> {
260        let resolved = self.resolve_file_path(file_path);
261        let detection_path = resolved.as_deref().unwrap_or(file_path);
262
263        let plugin = self.get_plugin_with_content(detection_path, content)?;
264        let (mut entities, tree) = plugin.extract_entities_with_tree(content, detection_path);
265        if let Some(ref rp) = resolved {
266            fix_entity_paths(&mut entities, file_path, rp);
267        }
268        Some((entities, tree))
269    }
270
271    /// Extract entities from multiple files in parallel.
272    pub fn extract_all_entities(
273        &self,
274        root: &Path,
275        file_paths: &[String],
276    ) -> Vec<SemanticEntity> {
277        let mut entities: Vec<SemanticEntity> = maybe_par_iter!(file_paths)
278            .flat_map(|fp| {
279                let full = root.join(fp);
280                let content = match std::fs::read_to_string(&full) {
281                    Ok(c) => c,
282                    Err(_) => return Vec::new(),
283                };
284                self.extract_entities(fp, &content)
285            })
286            .collect();
287        resolve_go_method_parent_ids(&mut entities);
288        entities
289    }
290}
291
292pub fn resolve_go_method_parent_ids(entities: &mut [SemanticEntity]) {
293    let mut types_by_package: HashMap<(String, String, String), String> = HashMap::new();
294
295    for entity in entities.iter() {
296        if !is_go_file(&entity.file_path) || !is_go_receiver_type_entity(entity) {
297            continue;
298        }
299
300        let package_name = go_package_name(entity).unwrap_or("");
301
302        types_by_package
303            .entry((
304                go_package_dir(&entity.file_path).to_string(),
305                package_name.to_string(),
306                entity.name.clone(),
307            ))
308            .or_insert_with(|| entity.id.clone());
309    }
310
311    for entity in entities.iter_mut() {
312        if !is_go_file(&entity.file_path) || entity.entity_type != "method" {
313            continue;
314        }
315
316        let package_name = go_package_name(entity).unwrap_or("");
317        let Some(receiver_name) = extract_go_receiver_type_name(&entity.content) else {
318            continue;
319        };
320
321        let key = (
322            go_package_dir(&entity.file_path).to_string(),
323            package_name.to_string(),
324            receiver_name,
325        );
326
327        let Some(parent_id) = types_by_package.get(&key) else {
328            continue;
329        };
330
331        if entity.parent_id.as_deref() == Some(parent_id.as_str()) {
332            continue;
333        }
334
335        entity.parent_id = Some(parent_id.clone());
336        entity.id = build_entity_id(
337            &entity.file_path,
338            &entity.entity_type,
339            &entity.name,
340            Some(parent_id),
341        );
342    }
343}
344
345fn is_go_file(file_path: &str) -> bool {
346    file_path.ends_with(".go")
347}
348
349fn is_go_receiver_type_entity(entity: &SemanticEntity) -> bool {
350    matches!(
351        entity.entity_type.as_str(),
352        "type" | "struct" | "class" | "interface"
353    )
354}
355
356fn go_package_name(entity: &SemanticEntity) -> Option<&str> {
357    entity
358        .metadata
359        .as_ref()
360        .and_then(|metadata| metadata.get("go.package"))
361        .map(String::as_str)
362}
363
364fn go_package_dir(file_path: &str) -> &str {
365    file_path.rsplit_once('/').map_or("", |(dir, _)| dir)
366}
367
368fn extract_go_receiver_type_name(content: &str) -> Option<String> {
369    let after_func = content.trim_start().strip_prefix("func")?.trim_start();
370    let receiver = after_func.strip_prefix('(')?;
371    let receiver_end = receiver.find(')')?;
372    let receiver = receiver[..receiver_end].trim();
373    if receiver.is_empty() {
374        return None;
375    }
376
377    let receiver_type = receiver.split_whitespace().last().unwrap_or(receiver);
378
379    let receiver_type = receiver_type.trim_start_matches('*').trim();
380    let receiver_type = receiver_type
381        .split_once('[')
382        .map_or(receiver_type, |(name, _)| name)
383        .trim();
384    let receiver_type = receiver_type
385        .rsplit_once('.')
386        .map_or(receiver_type, |(_, name)| name)
387        .trim();
388
389    (!receiver_type.is_empty()).then(|| receiver_type.to_string())
390}
391
392/// Restore original file path in entities when a custom extension mapping was used.
393fn fix_entity_paths(entities: &mut [SemanticEntity], original: &str, resolved: &str) {
394    for entity in entities {
395        entity.file_path = original.to_string();
396        entity.id = entity.id.replace(resolved, original);
397        if let Some(ref mut pid) = entity.parent_id {
398            *pid = pid.replace(resolved, original);
399        }
400    }
401}
402
403fn get_extensions(file_path: &str) -> Vec<String> {
404    let Some(file_name) = Path::new(file_path)
405        .file_name()
406        .and_then(|name| name.to_str())
407    else {
408        return Vec::new();
409    };
410
411    let file_name = file_name.to_lowercase();
412    let mut extensions = Vec::new();
413
414    for (idx, ch) in file_name.char_indices() {
415        if ch == '.' {
416            extensions.push(file_name[idx..].to_string());
417        }
418    }
419
420    extensions
421}
422
423const LANG_MAPPING: &[(&str, &str)] = &[
424    ("perl", ".pl"),
425    ("python", ".py"),
426    ("ruby", ".rb"),
427    ("bash", ".sh"),
428    ("shell", ".sh"),
429    ("/sh", ".sh"),
430    ("node", ".js"),
431    ("javascript", ".js"),
432    ("typescript", ".ts"),
433    ("tsx", ".tsx"),
434    ("swift", ".swift"),
435    ("elixir", ".ex"),
436    ("rust", ".rs"),
437    ("go", ".go"),
438    ("golang", ".go"),
439    ("kotlin", ".kt"),
440    ("dart", ".dart"),
441    ("php", ".php"),
442    ("java", ".java"),
443    ("c", ".c"),
444    ("cpp", ".cpp"),
445    ("c++", ".cpp"),
446    ("cs", ".cs"),
447    ("csharp", ".cs"),
448    ("c#", ".cs"),
449    ("fortran", ".f90"),
450    ("terraform", ".tf"),
451    ("hcl", ".hcl"),
452    ("ocaml", ".ml"),
453    ("scala", ".scala"),
454    ("haskell", ".hs"),
455    ("elm", ".elm"),
456    ("d", ".d"),
457    ("dlang", ".d"),
458    ("zig", ".zig"),
459    ("xml", ".xml"),
460    ("json", ".json"),
461    ("yaml", ".yaml"),
462    ("yml", ".yaml"),
463    ("toml", ".toml"),
464    ("markdown", ".md"),
465    ("csv", ".csv"),
466    ("eruby", ".erb"),
467    ("erb", ".erb"),
468    ("vue", ".vue"),
469    ("svelte", ".svelte"),
470];
471
472/// Detect file extension from shebang line, vim modeline, or content heuristics.
473pub fn detect_ext_from_content(content: &str) -> Option<String> {
474    // Try shebang (first line)
475    if let Some(first_line) = content.lines().next() {
476        if first_line.starts_with("#!") {
477            let shebang = first_line.to_lowercase();
478            for (keyword, ext) in LANG_MAPPING {
479                if shebang.contains(keyword) {
480                    return Some(ext.to_string());
481                }
482            }
483        }
484    }
485
486    // Try vim modeline (first 5 or last 5 lines)
487    // Formats: `vim: ft=perl`, `vim: filetype=perl`, `vim: set ft=perl`
488    let lines: Vec<&str> = content.lines().collect();
489    let check_lines = lines.iter().take(5).chain(lines.iter().rev().take(5));
490    for line in check_lines {
491        if let Some(ft) = extract_vim_filetype(line) {
492            let ft_lower = ft.to_lowercase();
493            for (keyword, ext) in LANG_MAPPING {
494                if ft_lower == *keyword {
495                    return Some(ext.to_string());
496                }
497            }
498        }
499    }
500
501    // Try content heuristics (first-line markers and early declarations)
502    if let Some(ext) = detect_from_content_heuristics(content) {
503        return Some(ext);
504    }
505
506    None
507}
508
509/// High-confidence content-based language detection.
510/// Only uses markers with near-zero false-positive rates.
511fn detect_from_content_heuristics(content: &str) -> Option<String> {
512    let first_line = content.lines().next().unwrap_or("").trim();
513
514    // PHP: opening tag is unambiguous
515    if first_line.starts_with("<?php") || first_line.starts_with("<?PHP") {
516        return Some(".php".to_string());
517    }
518
519    // XML/SVG/HTML: XML declaration or doctype
520    if first_line.starts_with("<?xml") {
521        return Some(".xml".to_string());
522    }
523    if first_line.starts_with("<!DOCTYPE") || first_line.starts_with("<!doctype") {
524        return Some(".xml".to_string());
525    }
526
527    // Scan first ~20 lines for language-specific patterns
528    for line in content.lines().take(20) {
529        let trimmed = line.trim();
530
531        // PHP: opening tag anywhere in early lines
532        if trimmed.starts_with("<?php") || trimmed.starts_with("<?PHP") || trimmed == "<?=" {
533            return Some(".php".to_string());
534        }
535
536        // C/C++: #include directive
537        if trimmed.starts_with("#include ") || trimmed.starts_with("#include\t") {
538            // Could be C or C++. Check for C++ indicators.
539            if content.lines().take(30).any(|l| {
540                let t = l.trim();
541                t.starts_with("using namespace")
542                    || t.starts_with("class ")
543                    || t.starts_with("#include <iostream")
544                    || t.starts_with("#include <vector")
545                    || t.starts_with("#include <string>")
546                    || t.starts_with("#include <memory")
547            }) {
548                return Some(".cpp".to_string());
549            }
550            return Some(".c".to_string());
551        }
552
553        // Java: package declaration with dots
554        if trimmed.starts_with("package ") && trimmed.contains('.') && trimmed.ends_with(';') {
555            return Some(".java".to_string());
556        }
557
558        // Go: package declaration without dots or semicolons
559        if trimmed.starts_with("package ") && !trimmed.contains('.') && !trimmed.contains(';') {
560            return Some(".go".to_string());
561        }
562
563        // Rust: common top-level declarations
564        if (trimmed.starts_with("use std::") || trimmed.starts_with("use crate::"))
565            && trimmed.ends_with(';')
566        {
567            return Some(".rs".to_string());
568        }
569
570        // Elixir: defmodule
571        if trimmed.starts_with("defmodule ") {
572            return Some(".ex".to_string());
573        }
574
575        // Kotlin: package with dots but no semicolon (Kotlin doesn't require semicolons)
576        if trimmed.starts_with("package ") && trimmed.contains('.') && !trimmed.ends_with(';') {
577            return Some(".kt".to_string());
578        }
579
580        // C#: using System or namespace with braces
581        if trimmed.starts_with("using System") && trimmed.ends_with(';') {
582            return Some(".cs".to_string());
583        }
584        if trimmed.starts_with("namespace ") && trimmed.ends_with('{') {
585            // Could be C++ too, but C++ usually has #include before namespace
586            // If we got here without matching #include, it's likely C#
587            return Some(".cs".to_string());
588        }
589
590        // Swift: import Foundation/UIKit/SwiftUI
591        if trimmed == "import Foundation"
592            || trimmed == "import UIKit"
593            || trimmed == "import SwiftUI"
594            || trimmed == "import Combine"
595        {
596            return Some(".swift".to_string());
597        }
598
599        // Dart: import 'dart:
600        if trimmed.starts_with("import 'dart:") || trimmed.starts_with("import \"dart:") {
601            return Some(".dart".to_string());
602        }
603
604        // Scala: object/trait at top level
605        if trimmed.starts_with("object ") || trimmed.starts_with("trait ") {
606            return Some(".scala".to_string());
607        }
608
609        // Zig: const std = @import
610        if trimmed.contains("@import(") {
611            return Some(".zig".to_string());
612        }
613
614        // HCL/Terraform: resource/variable/terraform blocks
615        if trimmed.starts_with("resource \"")
616            || trimmed.starts_with("variable \"")
617            || trimmed.starts_with("terraform {")
618            || trimmed.starts_with("provider \"")
619        {
620            return Some(".tf".to_string());
621        }
622
623        // Fortran: program/module/subroutine (case-insensitive)
624        let lower = trimmed.to_lowercase();
625        if lower.starts_with("program ") || lower.starts_with("module ")
626            || lower.starts_with("subroutine ") || lower == "implicit none"
627        {
628            // "module " could be Ruby, but Ruby uses "module X" without "implicit none"
629            // Check for Fortran-specific follow-up
630            if lower.starts_with("program ") || lower == "implicit none" {
631                return Some(".f90".to_string());
632            }
633            if content.lines().take(20).any(|l| l.trim().to_lowercase() == "implicit none") {
634                return Some(".f90".to_string());
635            }
636        }
637
638        // Python: def/class at indentation level 0 with colon
639        if (trimmed.starts_with("def ") || trimmed.starts_with("class "))
640            && trimmed.ends_with(':')
641            && line.starts_with(trimmed.chars().next().unwrap_or(' '))
642        {
643            return Some(".py".to_string());
644        }
645
646        // Ruby: require or module/class without colon (Python uses colon)
647        if trimmed.starts_with("require '") || trimmed.starts_with("require \"")
648            || trimmed.starts_with("require_relative ")
649        {
650            return Some(".rb".to_string());
651        }
652
653        // Perl: use strict/warnings, or variable declarations with sigils
654        if trimmed == "use strict;"
655            || trimmed == "use warnings;"
656            || trimmed.starts_with("my $")
657            || trimmed.starts_with("my @")
658            || trimmed.starts_with("my %")
659        {
660            return Some(".pl".to_string());
661        }
662    }
663
664    None
665}
666
667fn extract_vim_filetype(line: &str) -> Option<&str> {
668    // Match patterns: `vim: ft=X`, `vim: filetype=X`, `vim: set ft=X`
669    let line = line.trim();
670    let vim_idx = line.find("vim:")?;
671    let after_vim = &line[vim_idx + 4..];
672
673    for token in after_vim.split_whitespace() {
674        if let Some(val) = token.strip_prefix("ft=") {
675            return Some(val.trim_end_matches(':'));
676        }
677        if let Some(val) = token.strip_prefix("filetype=") {
678            return Some(val.trim_end_matches(':'));
679        }
680    }
681    None
682}
683
684#[cfg(test)]
685mod tests {
686    use crate::parser::plugins::create_default_registry;
687    use tempfile::TempDir;
688
689    fn write_file(dir: &TempDir, name: &str, content: &str) {
690        let path = dir.path().join(name);
691        if let Some(parent) = path.parent() {
692            std::fs::create_dir_all(parent).unwrap();
693        }
694        std::fs::write(path, content).unwrap();
695    }
696
697    #[test]
698    fn test_registry_matches_compound_svelte_typescript_suffix() {
699        let registry = create_default_registry();
700        let plugin = registry
701            .get_plugin("src/routes/+page.svelte.ts")
702            .expect("plugin should exist");
703
704        assert_eq!(plugin.id(), "svelte");
705    }
706
707    #[test]
708    fn test_registry_matches_compound_svelte_javascript_suffix() {
709        let registry = create_default_registry();
710        let plugin = registry
711            .get_plugin("src/routes/+layout.svelte.js")
712            .expect("plugin should exist");
713
714        assert_eq!(plugin.id(), "svelte");
715    }
716
717    #[test]
718    fn test_registry_matches_svelte_test_suffix() {
719        let registry = create_default_registry();
720        let plugin = registry
721            .get_plugin("src/lib/multiplier.svelte.test.js")
722            .expect("plugin should exist");
723
724        assert_eq!(plugin.id(), "svelte");
725    }
726
727    #[test]
728    fn test_registry_prefers_svelte_plugin_for_component_files() {
729        let registry = create_default_registry();
730        let plugin = registry
731            .get_plugin("src/lib/Component.svelte")
732            .expect("plugin should exist");
733
734        assert_eq!(plugin.id(), "svelte");
735    }
736
737    #[test]
738    fn test_registry_matches_typescript_module_suffix() {
739        let registry = create_default_registry();
740        let plugin = registry
741            .get_plugin("src/lib/index.mts")
742            .expect("plugin should exist");
743
744        assert_eq!(plugin.id(), "code");
745    }
746
747    #[test]
748    fn test_registry_matches_typescript_commonjs_suffix() {
749        let registry = create_default_registry();
750        let plugin = registry
751            .get_plugin("src/lib/index.cts")
752            .expect("plugin should exist");
753
754        assert_eq!(plugin.id(), "code");
755    }
756
757    #[test]
758    fn test_detect_php_from_opening_tag() {
759        let registry = create_default_registry();
760        let content = "<?php\nclass Vendor {\n    function get_name() { return $this->name; }\n}\n";
761        let plugin = registry
762            .get_plugin_with_content("vendor.inc2", content)
763            .expect("should detect PHP");
764        let entities = plugin.extract_entities(content, "vendor.inc2");
765        assert!(entities.iter().any(|e| e.entity_type == "class"));
766    }
767
768    #[test]
769    fn test_detect_c_from_include() {
770        let registry = create_default_registry();
771        let content = "#include <stdio.h>\n\nint main() {\n    printf(\"hello\");\n    return 0;\n}\n";
772        let plugin = registry
773            .get_plugin_with_content("main.xyz", content)
774            .expect("should detect C");
775        let entities = plugin.extract_entities(content, "main.xyz");
776        assert!(entities.iter().any(|e| e.name == "main"));
777    }
778
779    #[test]
780    fn test_detect_java_from_package() {
781        let registry = create_default_registry();
782        let content = "package com.example.app;\n\npublic class Main {\n    public static void main(String[] args) {}\n}\n";
783        let plugin = registry
784            .get_plugin_with_content("Main", content)
785            .expect("should detect Java");
786        let entities = plugin.extract_entities(content, "Main");
787        assert!(entities.iter().any(|e| e.name == "Main"));
788    }
789
790    #[test]
791    fn test_detect_go_from_package() {
792        let registry = create_default_registry();
793        let content = "package main\n\nimport \"fmt\"\n\nfunc hello() {\n    fmt.Println(\"hi\")\n}\n";
794        let plugin = registry
795            .get_plugin_with_content("main", content)
796            .expect("should detect Go");
797        let entities = plugin.extract_entities(content, "main");
798        assert!(entities.iter().any(|e| e.name == "hello"));
799    }
800
801    #[test]
802    fn test_detect_rust_from_use_std() {
803        let registry = create_default_registry();
804        let content = "use std::collections::HashMap;\n\nfn process() {\n    let m = HashMap::new();\n}\n";
805        let plugin = registry
806            .get_plugin_with_content("lib", content)
807            .expect("should detect Rust");
808        let entities = plugin.extract_entities(content, "lib");
809        assert!(entities.iter().any(|e| e.name == "process"));
810    }
811
812    #[cfg(feature = "lang-go")]
813    #[test]
814    fn test_go_method_parent_resolves_across_files() {
815        let registry = create_default_registry();
816        let dir = TempDir::new().unwrap();
817        write_file(&dir, "models.go", "package demo\n\ntype Service struct{}\n");
818        write_file(
819            &dir,
820            "methods.go",
821            "package demo\n\nfunc (s *Service) Run() {}\n",
822        );
823
824        let entities = registry.extract_all_entities(
825            dir.path(),
826            &["models.go".to_string(), "methods.go".to_string()],
827        );
828        let service = entities
829            .iter()
830            .find(|e| e.name == "Service" && e.file_path == "models.go")
831            .expect("Service type should be extracted");
832        let run = entities
833            .iter()
834            .find(|e| e.name == "Run" && e.file_path == "methods.go")
835            .expect("Run method should be extracted");
836
837        assert_eq!(run.parent_id.as_deref(), Some(service.id.as_str()));
838        assert_eq!(run.id, format!("{}::Run", service.id));
839    }
840
841    #[cfg(feature = "lang-go")]
842    #[test]
843    fn test_go_method_parent_resolution_is_package_directory_scoped() {
844        let registry = create_default_registry();
845        let dir = TempDir::new().unwrap();
846        write_file(&dir, "alpha/models.go", "package demo\n\ntype Service struct{}\n");
847        write_file(
848            &dir,
849            "alpha/methods.go",
850            "package demo\n\nfunc (s *Service) Run() {}\n",
851        );
852        write_file(&dir, "beta/models.go", "package demo\n\ntype Service struct{}\n");
853        write_file(
854            &dir,
855            "beta/methods.go",
856            "package demo\n\nfunc (s *Service) Run() {}\n",
857        );
858
859        let entities = registry.extract_all_entities(
860            dir.path(),
861            &[
862                "alpha/models.go".to_string(),
863                "alpha/methods.go".to_string(),
864                "beta/models.go".to_string(),
865                "beta/methods.go".to_string(),
866            ],
867        );
868
869        let alpha_service = entities
870            .iter()
871            .find(|e| e.name == "Service" && e.file_path == "alpha/models.go")
872            .expect("alpha Service type should be extracted");
873        let beta_service = entities
874            .iter()
875            .find(|e| e.name == "Service" && e.file_path == "beta/models.go")
876            .expect("beta Service type should be extracted");
877        let alpha_run = entities
878            .iter()
879            .find(|e| e.name == "Run" && e.file_path == "alpha/methods.go")
880            .expect("alpha Run method should be extracted");
881        let beta_run = entities
882            .iter()
883            .find(|e| e.name == "Run" && e.file_path == "beta/methods.go")
884            .expect("beta Run method should be extracted");
885
886        assert_eq!(alpha_run.parent_id.as_deref(), Some(alpha_service.id.as_str()));
887        assert_eq!(beta_run.parent_id.as_deref(), Some(beta_service.id.as_str()));
888    }
889
890    #[test]
891    fn test_extension_takes_priority_over_heuristics() {
892        let registry = create_default_registry();
893        // Content looks like PHP but file has .py extension
894        let content = "<?php\nclass Foo {}\n";
895        let plugin = registry
896            .get_plugin_with_content("script.py", content)
897            .expect("should use Python parser");
898        assert_eq!(plugin.id(), "code"); // Python uses code plugin, not PHP
899    }
900
901    #[test]
902    fn test_load_semrc_parses_test_dirs() {
903        let dir = TempDir::new().unwrap();
904        write_file(&dir, ".semrc", "test-dirs = e2e-tests, smoke, qa\n");
905        let mut registry = create_default_registry();
906        registry.load_semrc(dir.path());
907        assert_eq!(registry.custom_test_dirs, vec!["e2e-tests", "smoke", "qa"]);
908    }
909
910    #[test]
911    fn test_load_semrc_test_dirs_with_extension_mappings() {
912        let dir = TempDir::new().unwrap();
913        write_file(&dir, ".semrc", ".inc = php\ntest-dirs = custom-tests\n.xyz = cpp\n");
914        let mut registry = create_default_registry();
915        registry.load_semrc(dir.path());
916        assert_eq!(registry.custom_test_dirs, vec!["custom-tests"]);
917        assert!(registry.get_plugin("foo.inc").is_some());
918        assert!(registry.get_plugin("bar.xyz").is_some());
919    }
920
921    #[test]
922    fn test_load_semrc_skips_empty_test_dirs() {
923        let dir = TempDir::new().unwrap();
924        write_file(&dir, ".semrc", "test-dirs = , ,  \n");
925        let mut registry = create_default_registry();
926        registry.load_semrc(dir.path());
927        assert!(registry.custom_test_dirs.is_empty());
928    }
929
930    #[test]
931    fn test_load_semrc_no_test_dirs_leaves_empty() {
932        let dir = TempDir::new().unwrap();
933        write_file(&dir, ".semrc", ".inc = php\n");
934        let mut registry = create_default_registry();
935        registry.load_semrc(dir.path());
936        assert!(registry.custom_test_dirs.is_empty());
937    }
938
939    #[test]
940    fn test_custom_extension_mapping_extracts_entities() {
941        let mut registry = create_default_registry();
942        registry.add_extension_mapping(".mypy", "python");
943
944        let content = "def hello():\n    print(\"hello world\")\n\nclass Calculator:\n    def multiply(self, a, b):\n        return a * b\n";
945        let entities = registry.extract_entities("utils.mypy", content);
946
947        assert!(!entities.is_empty(), "Should extract entities via custom mapping");
948        assert!(entities.iter().any(|e| e.name == "hello"), "Should find hello function");
949        assert!(entities.iter().any(|e| e.name == "Calculator"), "Should find Calculator class");
950        assert!(entities.iter().any(|e| e.name == "multiply"), "Should find multiply method");
951
952        // File path should preserve the original extension
953        for entity in &entities {
954            assert_eq!(entity.file_path, "utils.mypy", "Entity file_path should use original extension");
955            assert!(entity.id.starts_with("utils.mypy::"), "Entity ID should use original file path");
956        }
957    }
958}