Skip to main content

fresh/primitives/grammar/
types.rs

1//! Pure grammar registry types without I/O operations.
2//!
3//! This module contains the `GrammarRegistry` struct and all syntax lookup methods
4//! that don't require filesystem access. This enables WASM compatibility and easier testing.
5
6use serde::Deserialize;
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10use syntect::parsing::{SyntaxDefinition, SyntaxReference, SyntaxSet, SyntaxSetBuilder};
11
12/// Embedded TOML grammar (syntect doesn't include one)
13pub const TOML_GRAMMAR: &str = include_str!("../../grammars/toml.sublime-syntax");
14
15/// Embedded Odin grammar (syntect doesn't include one)
16/// From: https://github.com/Tetralux/sublime-odin (MIT License)
17pub const ODIN_GRAMMAR: &str = include_str!("../../grammars/odin/Odin.sublime-syntax");
18
19/// Embedded Zig grammar (syntect doesn't include one)
20pub const ZIG_GRAMMAR: &str = include_str!("../../grammars/zig.sublime-syntax");
21
22/// Embedded Git Rebase Todo grammar for interactive rebase
23pub const GIT_REBASE_GRAMMAR: &str = include_str!("../../grammars/git-rebase.sublime-syntax");
24
25/// Embedded Git Commit Message grammar for COMMIT_EDITMSG, MERGE_MSG, etc.
26pub const GIT_COMMIT_GRAMMAR: &str = include_str!("../../grammars/git-commit.sublime-syntax");
27
28/// Embedded Gitignore grammar for .gitignore and similar files
29pub const GITIGNORE_GRAMMAR: &str = include_str!("../../grammars/gitignore.sublime-syntax");
30
31/// Embedded Git Config grammar for .gitconfig, .gitmodules
32pub const GITCONFIG_GRAMMAR: &str = include_str!("../../grammars/gitconfig.sublime-syntax");
33
34/// Embedded Git Attributes grammar for .gitattributes
35pub const GITATTRIBUTES_GRAMMAR: &str = include_str!("../../grammars/gitattributes.sublime-syntax");
36
37/// Registry of all available TextMate grammars.
38///
39/// This struct holds the compiled syntax set and provides lookup methods.
40/// It does not perform I/O directly - use `GrammarLoader` for loading grammars.
41pub struct GrammarRegistry {
42    /// Combined syntax set (built-in + embedded + user grammars)
43    syntax_set: Arc<SyntaxSet>,
44    /// Extension -> scope name mapping for user grammars (takes priority)
45    user_extensions: HashMap<String, String>,
46    /// Filename -> scope name mapping for dotfiles and special files
47    filename_scopes: HashMap<String, String>,
48    /// Paths to dynamically loaded grammar files (for reloading when adding more)
49    loaded_grammar_paths: Vec<(String, PathBuf, Vec<String>)>,
50}
51
52impl GrammarRegistry {
53    /// Create a new GrammarRegistry from pre-built components.
54    ///
55    /// This is typically called by `GrammarLoader` implementations after
56    /// loading grammars from various sources.
57    pub fn new(
58        syntax_set: SyntaxSet,
59        user_extensions: HashMap<String, String>,
60        filename_scopes: HashMap<String, String>,
61    ) -> Self {
62        Self {
63            syntax_set: Arc::new(syntax_set),
64            user_extensions,
65            filename_scopes,
66            loaded_grammar_paths: Vec::new(),
67        }
68    }
69
70    /// Create an empty grammar registry (fast, for tests that don't need syntax highlighting)
71    pub fn empty() -> Arc<Self> {
72        let mut builder = SyntaxSetBuilder::new();
73        builder.add_plain_text_syntax();
74        Arc::new(Self {
75            syntax_set: Arc::new(builder.build()),
76            user_extensions: HashMap::new(),
77            filename_scopes: HashMap::new(),
78            loaded_grammar_paths: Vec::new(),
79        })
80    }
81
82    /// Build the default filename -> scope mappings for dotfiles and special files.
83    pub fn build_filename_scopes() -> HashMap<String, String> {
84        let mut map = HashMap::new();
85
86        // Shell configuration files -> Bash/Shell script scope
87        let shell_scope = "source.shell.bash".to_string();
88        for filename in [
89            ".zshrc",
90            ".zprofile",
91            ".zshenv",
92            ".zlogin",
93            ".zlogout",
94            ".bash_aliases",
95            // .bashrc and .bash_profile are already recognized by syntect
96            // Common shell script files without extensions
97            "PKGBUILD",
98            "APKBUILD",
99        ] {
100            map.insert(filename.to_string(), shell_scope.clone());
101        }
102
103        // Git rebase todo files
104        let git_rebase_scope = "source.git-rebase-todo".to_string();
105        map.insert("git-rebase-todo".to_string(), git_rebase_scope);
106
107        // Git commit message files
108        let git_commit_scope = "source.git-commit".to_string();
109        for filename in ["COMMIT_EDITMSG", "MERGE_MSG", "SQUASH_MSG", "TAG_EDITMSG"] {
110            map.insert(filename.to_string(), git_commit_scope.clone());
111        }
112
113        // Gitignore and similar files
114        let gitignore_scope = "source.gitignore".to_string();
115        for filename in [".gitignore", ".dockerignore", ".npmignore", ".hgignore"] {
116            map.insert(filename.to_string(), gitignore_scope.clone());
117        }
118
119        // Git config files
120        let gitconfig_scope = "source.gitconfig".to_string();
121        for filename in [".gitconfig", ".gitmodules"] {
122            map.insert(filename.to_string(), gitconfig_scope.clone());
123        }
124
125        // Git attributes files
126        let gitattributes_scope = "source.gitattributes".to_string();
127        map.insert(".gitattributes".to_string(), gitattributes_scope);
128
129        map
130    }
131
132    /// Add embedded grammars (TOML, Odin, etc.) to a syntax set builder.
133    pub fn add_embedded_grammars(builder: &mut SyntaxSetBuilder) {
134        // TOML grammar
135        match SyntaxDefinition::load_from_str(TOML_GRAMMAR, true, Some("TOML")) {
136            Ok(syntax) => {
137                builder.add(syntax);
138                tracing::debug!("Loaded embedded TOML grammar");
139            }
140            Err(e) => {
141                tracing::warn!("Failed to load embedded TOML grammar: {}", e);
142            }
143        }
144
145        // Odin grammar
146        match SyntaxDefinition::load_from_str(ODIN_GRAMMAR, true, Some("Odin")) {
147            Ok(syntax) => {
148                builder.add(syntax);
149                tracing::debug!("Loaded embedded Odin grammar");
150            }
151            Err(e) => {
152                tracing::warn!("Failed to load embedded Odin grammar: {}", e);
153            }
154        }
155
156        // Zig grammar
157        match SyntaxDefinition::load_from_str(ZIG_GRAMMAR, true, Some("Zig")) {
158            Ok(syntax) => {
159                builder.add(syntax);
160                tracing::debug!("Loaded embedded Zig grammar");
161            }
162            Err(e) => {
163                tracing::warn!("Failed to load embedded Zig grammar: {}", e);
164            }
165        }
166
167        // Git Rebase Todo grammar
168        match SyntaxDefinition::load_from_str(GIT_REBASE_GRAMMAR, true, Some("Git Rebase Todo")) {
169            Ok(syntax) => {
170                builder.add(syntax);
171                tracing::debug!("Loaded embedded Git Rebase Todo grammar");
172            }
173            Err(e) => {
174                tracing::warn!("Failed to load embedded Git Rebase Todo grammar: {}", e);
175            }
176        }
177
178        // Git Commit Message grammar
179        match SyntaxDefinition::load_from_str(GIT_COMMIT_GRAMMAR, true, Some("Git Commit Message"))
180        {
181            Ok(syntax) => {
182                builder.add(syntax);
183                tracing::debug!("Loaded embedded Git Commit Message grammar");
184            }
185            Err(e) => {
186                tracing::warn!("Failed to load embedded Git Commit Message grammar: {}", e);
187            }
188        }
189
190        // Gitignore grammar
191        match SyntaxDefinition::load_from_str(GITIGNORE_GRAMMAR, true, Some("Gitignore")) {
192            Ok(syntax) => {
193                builder.add(syntax);
194                tracing::debug!("Loaded embedded Gitignore grammar");
195            }
196            Err(e) => {
197                tracing::warn!("Failed to load embedded Gitignore grammar: {}", e);
198            }
199        }
200
201        // Git Config grammar
202        match SyntaxDefinition::load_from_str(GITCONFIG_GRAMMAR, true, Some("Git Config")) {
203            Ok(syntax) => {
204                builder.add(syntax);
205                tracing::debug!("Loaded embedded Git Config grammar");
206            }
207            Err(e) => {
208                tracing::warn!("Failed to load embedded Git Config grammar: {}", e);
209            }
210        }
211
212        // Git Attributes grammar
213        match SyntaxDefinition::load_from_str(GITATTRIBUTES_GRAMMAR, true, Some("Git Attributes")) {
214            Ok(syntax) => {
215                builder.add(syntax);
216                tracing::debug!("Loaded embedded Git Attributes grammar");
217            }
218            Err(e) => {
219                tracing::warn!("Failed to load embedded Git Attributes grammar: {}", e);
220            }
221        }
222    }
223
224    /// Find syntax for a file by path/extension/filename.
225    ///
226    /// Checks in order:
227    /// 1. User-configured grammar extensions (by scope)
228    /// 2. By extension (includes built-in + embedded grammars)
229    /// 3. By filename (custom dotfile mappings like .zshrc)
230    /// 4. By filename via syntect (handles Makefile, .bashrc, etc.)
231    pub fn find_syntax_for_file(&self, path: &Path) -> Option<&SyntaxReference> {
232        // Try extension-based lookup first
233        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
234            // Check user grammars first (higher priority)
235            if let Some(scope) = self.user_extensions.get(ext) {
236                tracing::info!("[SYNTAX DEBUG] find_syntax_for_file: found ext '{}' in user_extensions -> scope '{}'", ext, scope);
237                if let Some(syntax) = syntect::parsing::Scope::new(scope)
238                    .ok()
239                    .and_then(|s| self.syntax_set.find_syntax_by_scope(s))
240                {
241                    tracing::info!(
242                        "[SYNTAX DEBUG] find_syntax_for_file: found syntax by scope: {}",
243                        syntax.name
244                    );
245                    return Some(syntax);
246                } else {
247                    tracing::info!(
248                        "[SYNTAX DEBUG] find_syntax_for_file: scope '{}' not found in syntax_set",
249                        scope
250                    );
251                }
252            } else {
253                tracing::info!(
254                    "[SYNTAX DEBUG] find_syntax_for_file: ext '{}' NOT in user_extensions",
255                    ext
256                );
257            }
258
259            // Try extension lookup (includes embedded grammars like TOML)
260            if let Some(syntax) = self.syntax_set.find_syntax_by_extension(ext) {
261                tracing::info!(
262                    "[SYNTAX DEBUG] find_syntax_for_file: found by syntect extension: {}",
263                    syntax.name
264                );
265                return Some(syntax);
266            }
267        }
268
269        // Try filename-based lookup for dotfiles and special files
270        if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
271            if let Some(scope) = self.filename_scopes.get(filename) {
272                if let Some(syntax) = syntect::parsing::Scope::new(scope)
273                    .ok()
274                    .and_then(|s| self.syntax_set.find_syntax_by_scope(s))
275                {
276                    return Some(syntax);
277                }
278            }
279        }
280
281        // Try syntect's full file detection (handles special filenames like Makefile)
282        // This may do I/O for first-line detection, but handles many cases
283        if let Ok(Some(syntax)) = self.syntax_set.find_syntax_for_file(path) {
284            return Some(syntax);
285        }
286
287        tracing::info!(
288            "[SYNTAX DEBUG] find_syntax_for_file: no syntax found for {:?}",
289            path
290        );
291        None
292    }
293
294    /// Find syntax for a file, checking user-configured languages first.
295    ///
296    /// This method extends `find_syntax_for_file` by first checking the provided
297    /// languages configuration for filename and extension matches. This allows
298    /// users to configure custom filename patterns (like PKGBUILD for bash) that
299    /// will be respected for syntax highlighting.
300    ///
301    /// Checks in order:
302    /// 1. User-configured language filenames from config
303    /// 2. User-configured language extensions from config
304    /// 3. Falls back to `find_syntax_for_file` for built-in detection
305    pub fn find_syntax_for_file_with_languages(
306        &self,
307        path: &Path,
308        languages: &std::collections::HashMap<String, crate::config::LanguageConfig>,
309    ) -> Option<&SyntaxReference> {
310        let extension = path.extension().and_then(|e| e.to_str());
311        tracing::info!(
312            "[SYNTAX DEBUG] find_syntax_for_file_with_languages: path={:?}, ext={:?}, languages_config_keys={:?}",
313            path,
314            extension,
315            languages.keys().collect::<Vec<_>>()
316        );
317
318        // Try filename match from languages config first
319        if let Some(filename) = path.file_name().and_then(|f| f.to_str()) {
320            for (lang_name, lang_config) in languages.iter() {
321                if lang_config.filenames.iter().any(|f| f == filename) {
322                    tracing::info!(
323                        "[SYNTAX DEBUG] filename match: {} -> grammar '{}'",
324                        lang_name,
325                        lang_config.grammar
326                    );
327                    // Found a match - try to find syntax by grammar name
328                    if let Some(syntax) = self.find_syntax_by_name(&lang_config.grammar) {
329                        tracing::info!(
330                            "[SYNTAX DEBUG] found syntax by grammar name: {}",
331                            syntax.name
332                        );
333                        return Some(syntax);
334                    }
335                    // Also try finding by extension if grammar name didn't work
336                    // (some grammars are named differently)
337                    if !lang_config.extensions.is_empty() {
338                        if let Some(ext) = lang_config.extensions.first() {
339                            if let Some(syntax) = self.syntax_set.find_syntax_by_extension(ext) {
340                                tracing::info!(
341                                    "[SYNTAX DEBUG] found syntax by extension fallback: {}",
342                                    syntax.name
343                                );
344                                return Some(syntax);
345                            }
346                        }
347                    }
348                }
349            }
350        }
351
352        // Try extension match from languages config
353        if let Some(extension) = extension {
354            for (lang_name, lang_config) in languages.iter() {
355                if lang_config.extensions.iter().any(|ext| ext == extension) {
356                    tracing::info!(
357                        "[SYNTAX DEBUG] extension match in config: ext={}, lang={}, grammar='{}'",
358                        extension,
359                        lang_name,
360                        lang_config.grammar
361                    );
362                    // Found a match - try to find syntax by grammar name
363                    if let Some(syntax) = self.find_syntax_by_name(&lang_config.grammar) {
364                        tracing::info!(
365                            "[SYNTAX DEBUG] found syntax by grammar name: {}",
366                            syntax.name
367                        );
368                        return Some(syntax);
369                    } else {
370                        tracing::info!(
371                            "[SYNTAX DEBUG] grammar name '{}' not found in registry",
372                            lang_config.grammar
373                        );
374                    }
375                }
376            }
377        }
378
379        // Fall back to built-in detection
380        tracing::info!("[SYNTAX DEBUG] falling back to find_syntax_for_file");
381        let result = self.find_syntax_for_file(path);
382        tracing::info!(
383            "[SYNTAX DEBUG] find_syntax_for_file result: {:?}",
384            result.map(|s| &s.name)
385        );
386        result
387    }
388
389    /// Find syntax by first line content (shebang, mode line, etc.)
390    ///
391    /// Use this when you have the file content but path-based detection failed.
392    pub fn find_syntax_by_first_line(&self, first_line: &str) -> Option<&SyntaxReference> {
393        self.syntax_set.find_syntax_by_first_line(first_line)
394    }
395
396    /// Find syntax by scope name
397    pub fn find_syntax_by_scope(&self, scope: &str) -> Option<&SyntaxReference> {
398        let scope = syntect::parsing::Scope::new(scope).ok()?;
399        self.syntax_set.find_syntax_by_scope(scope)
400    }
401
402    /// Find syntax by name (case-insensitive)
403    ///
404    /// This allows config files to use lowercase grammar names like "go" while
405    /// matching syntect's actual names like "Go".
406    pub fn find_syntax_by_name(&self, name: &str) -> Option<&SyntaxReference> {
407        // Try exact match first
408        if let Some(syntax) = self.syntax_set.find_syntax_by_name(name) {
409            return Some(syntax);
410        }
411        // Fall back to case-insensitive match
412        let name_lower = name.to_lowercase();
413        self.syntax_set
414            .syntaxes()
415            .iter()
416            .find(|s| s.name.to_lowercase() == name_lower)
417    }
418
419    /// Get the underlying syntax set
420    pub fn syntax_set(&self) -> &Arc<SyntaxSet> {
421        &self.syntax_set
422    }
423
424    /// Get a clone of the Arc for sharing
425    pub fn syntax_set_arc(&self) -> Arc<SyntaxSet> {
426        Arc::clone(&self.syntax_set)
427    }
428
429    /// List all available syntax names
430    pub fn available_syntaxes(&self) -> Vec<&str> {
431        self.syntax_set
432            .syntaxes()
433            .iter()
434            .map(|s| s.name.as_str())
435            .collect()
436    }
437
438    /// Debug helper: get user extensions as a string for logging
439    pub fn user_extensions_debug(&self) -> String {
440        format!("{:?}", self.user_extensions.keys().collect::<Vec<_>>())
441    }
442
443    /// Check if a syntax is available for an extension
444    pub fn has_syntax_for_extension(&self, ext: &str) -> bool {
445        if self.user_extensions.contains_key(ext) {
446            return true;
447        }
448
449        // Check built-in syntaxes
450        let dummy_path = PathBuf::from(format!("file.{}", ext));
451        self.syntax_set
452            .find_syntax_for_file(&dummy_path)
453            .ok()
454            .flatten()
455            .is_some()
456    }
457
458    /// Get the user extensions mapping (extension -> scope name)
459    pub fn user_extensions(&self) -> &HashMap<String, String> {
460        &self.user_extensions
461    }
462
463    /// Get the filename scopes mapping (filename -> scope name)
464    pub fn filename_scopes(&self) -> &HashMap<String, String> {
465        &self.filename_scopes
466    }
467
468    /// Create a new registry with additional grammar files
469    ///
470    /// This builds a new GrammarRegistry that includes all grammars from
471    /// the base registry plus the additional grammars specified.
472    ///
473    /// # Arguments
474    /// * `base` - The base registry to extend
475    /// * `additional` - List of (language, path, extensions) tuples for new grammars
476    ///
477    /// # Returns
478    /// A new GrammarRegistry with the additional grammars, or None if rebuilding fails
479    pub fn with_additional_grammars(
480        base: &GrammarRegistry,
481        additional: &[(String, PathBuf, Vec<String>)],
482    ) -> Option<Self> {
483        tracing::info!(
484            "[SYNTAX DEBUG] with_additional_grammars: adding {} grammars, base has {} user_extensions, {} previously loaded grammars",
485            additional.len(),
486            base.user_extensions.len(),
487            base.loaded_grammar_paths.len()
488        );
489
490        // Start with defaults and embedded grammars (same as Default impl)
491        let defaults = SyntaxSet::load_defaults_newlines();
492        let mut builder = defaults.into_builder();
493        Self::add_embedded_grammars(&mut builder);
494
495        // Start fresh with user extensions - we'll rebuild from loaded grammars
496        let mut user_extensions = HashMap::new();
497
498        // Track all loaded grammar paths (existing + new)
499        let mut loaded_grammar_paths = base.loaded_grammar_paths.clone();
500
501        // First, reload all previously loaded grammars from base
502        for (language, path, extensions) in &base.loaded_grammar_paths {
503            tracing::info!(
504                "[SYNTAX DEBUG] reloading existing grammar: lang='{}', path={:?}",
505                language,
506                path
507            );
508            match Self::load_grammar_file(path) {
509                Ok(syntax) => {
510                    let scope = syntax.scope.to_string();
511                    builder.add(syntax);
512                    for ext in extensions {
513                        user_extensions.insert(ext.clone(), scope.clone());
514                    }
515                }
516                Err(e) => {
517                    tracing::warn!(
518                        "Failed to reload grammar for '{}' from {:?}: {}",
519                        language,
520                        path,
521                        e
522                    );
523                }
524            }
525        }
526
527        // Add each new grammar
528        for (language, path, extensions) in additional {
529            tracing::info!(
530                "[SYNTAX DEBUG] loading new grammar file: lang='{}', path={:?}, extensions={:?}",
531                language,
532                path,
533                extensions
534            );
535            match Self::load_grammar_file(path) {
536                Ok(syntax) => {
537                    let scope = syntax.scope.to_string();
538                    tracing::info!(
539                        "[SYNTAX DEBUG] grammar loaded successfully: name='{}', scope='{}'",
540                        syntax.name,
541                        scope
542                    );
543                    builder.add(syntax);
544                    tracing::info!(
545                        "Loaded grammar for '{}' from {:?} with extensions {:?}",
546                        language,
547                        path,
548                        extensions
549                    );
550                    // Register extensions for this grammar
551                    for ext in extensions {
552                        user_extensions.insert(ext.clone(), scope.clone());
553                    }
554                    // Track this grammar path for future reloads
555                    loaded_grammar_paths.push((language.clone(), path.clone(), extensions.clone()));
556                }
557                Err(e) => {
558                    tracing::warn!(
559                        "Failed to load grammar for '{}' from {:?}: {}",
560                        language,
561                        path,
562                        e
563                    );
564                }
565            }
566        }
567
568        Some(Self {
569            syntax_set: Arc::new(builder.build()),
570            user_extensions,
571            filename_scopes: base.filename_scopes.clone(),
572            loaded_grammar_paths,
573        })
574    }
575
576    /// Load a grammar file from disk
577    ///
578    /// Only Sublime Text (.sublime-syntax) format is supported.
579    /// TextMate (.tmLanguage) grammars use a completely different format
580    /// and cannot be loaded by syntect's yaml-load feature.
581    fn load_grammar_file(path: &Path) -> Result<SyntaxDefinition, String> {
582        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
583
584        match ext {
585            "sublime-syntax" => {
586                let content = std::fs::read_to_string(path)
587                    .map_err(|e| format!("Failed to read file: {}", e))?;
588                SyntaxDefinition::load_from_str(
589                    &content,
590                    true,
591                    path.file_stem().and_then(|s| s.to_str()),
592                )
593                .map_err(|e| format!("Failed to parse sublime-syntax: {}", e))
594            }
595            _ => Err(format!(
596                "Unsupported grammar format: .{}. Only .sublime-syntax is supported.",
597                ext
598            )),
599        }
600    }
601}
602
603impl Default for GrammarRegistry {
604    fn default() -> Self {
605        // Create with defaults and embedded grammars only (no user grammars)
606        let defaults = SyntaxSet::load_defaults_newlines();
607        let mut builder = defaults.into_builder();
608        Self::add_embedded_grammars(&mut builder);
609        let syntax_set = builder.build();
610        let filename_scopes = Self::build_filename_scopes();
611
612        Self::new(syntax_set, HashMap::new(), filename_scopes)
613    }
614}
615
616// VSCode package.json structures for parsing grammar manifests
617
618#[derive(Debug, Deserialize)]
619pub struct PackageManifest {
620    #[serde(default)]
621    pub contributes: Option<Contributes>,
622}
623
624#[derive(Debug, Deserialize, Default)]
625pub struct Contributes {
626    #[serde(default)]
627    pub languages: Vec<LanguageContribution>,
628    #[serde(default)]
629    pub grammars: Vec<GrammarContribution>,
630}
631
632#[derive(Debug, Deserialize)]
633pub struct LanguageContribution {
634    pub id: String,
635    #[serde(default)]
636    pub extensions: Vec<String>,
637}
638
639#[derive(Debug, Deserialize)]
640pub struct GrammarContribution {
641    pub language: String,
642    #[serde(rename = "scopeName")]
643    pub scope_name: String,
644    pub path: String,
645}
646
647#[cfg(test)]
648mod tests {
649    use super::*;
650
651    #[test]
652    fn test_empty_registry() {
653        let registry = GrammarRegistry::empty();
654        // Should have at least plain text
655        assert!(!registry.available_syntaxes().is_empty());
656    }
657
658    #[test]
659    fn test_default_registry() {
660        let registry = GrammarRegistry::default();
661        // Should have built-in syntaxes
662        assert!(!registry.available_syntaxes().is_empty());
663    }
664
665    #[test]
666    fn test_find_syntax_for_common_extensions() {
667        let registry = GrammarRegistry::default();
668
669        // Test common extensions that syntect should support
670        let test_cases = [
671            ("test.py", true),
672            ("test.rs", true),
673            ("test.js", true),
674            ("test.json", true),
675            ("test.md", true),
676            ("test.html", true),
677            ("test.css", true),
678            ("test.unknown_extension_xyz", false),
679        ];
680
681        for (filename, should_exist) in test_cases {
682            let path = Path::new(filename);
683            let result = registry.find_syntax_for_file(path);
684            assert_eq!(
685                result.is_some(),
686                should_exist,
687                "Expected {:?} for {}",
688                should_exist,
689                filename
690            );
691        }
692    }
693
694    #[test]
695    fn test_syntax_set_arc() {
696        let registry = GrammarRegistry::default();
697        let arc1 = registry.syntax_set_arc();
698        let arc2 = registry.syntax_set_arc();
699        // Both should point to the same data
700        assert!(Arc::ptr_eq(&arc1, &arc2));
701    }
702
703    #[test]
704    fn test_shell_dotfiles_detection() {
705        let registry = GrammarRegistry::default();
706
707        // All these should be detected as shell scripts
708        let shell_files = [".zshrc", ".zprofile", ".zshenv", ".bash_aliases"];
709
710        for filename in shell_files {
711            let path = Path::new(filename);
712            let result = registry.find_syntax_for_file(path);
713            assert!(
714                result.is_some(),
715                "{} should be detected as a syntax",
716                filename
717            );
718            let syntax = result.unwrap();
719            // Should be detected as Bash/Shell
720            assert!(
721                syntax.name.to_lowercase().contains("bash")
722                    || syntax.name.to_lowercase().contains("shell"),
723                "{} should be detected as shell/bash, got: {}",
724                filename,
725                syntax.name
726            );
727        }
728    }
729
730    #[test]
731    fn test_pkgbuild_detection() {
732        let registry = GrammarRegistry::default();
733
734        // PKGBUILD and APKBUILD should be detected as shell scripts
735        for filename in ["PKGBUILD", "APKBUILD"] {
736            let path = Path::new(filename);
737            let result = registry.find_syntax_for_file(path);
738            assert!(
739                result.is_some(),
740                "{} should be detected as a syntax",
741                filename
742            );
743            let syntax = result.unwrap();
744            // Should be detected as Bash/Shell
745            assert!(
746                syntax.name.to_lowercase().contains("bash")
747                    || syntax.name.to_lowercase().contains("shell"),
748                "{} should be detected as shell/bash, got: {}",
749                filename,
750                syntax.name
751            );
752        }
753    }
754}