Skip to main content

fresh/primitives/grammar/
types.rs

1//! Pure grammar registry types without I/O operations.
2//!
3//! This module contains the `GrammarRegistry` struct and all syntax lookup methods
4//! that don't require filesystem access. This enables WASM compatibility and easier testing.
5
6use serde::Deserialize;
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10use syntect::parsing::{SyntaxDefinition, SyntaxReference, SyntaxSet, SyntaxSetBuilder};
11
12// Re-export glob matching utilities for use by other modules
13pub use crate::primitives::glob_match::{
14    filename_glob_matches, is_glob_pattern, is_path_pattern, path_glob_matches,
15};
16
17/// Embedded TOML grammar (syntect doesn't include one)
18pub const TOML_GRAMMAR: &str = include_str!("../../grammars/toml.sublime-syntax");
19
20/// Embedded Odin grammar (syntect doesn't include one)
21/// From: https://github.com/Tetralux/sublime-odin (MIT License)
22pub const ODIN_GRAMMAR: &str = include_str!("../../grammars/odin/Odin.sublime-syntax");
23
24/// Embedded Zig grammar (syntect doesn't include one)
25pub const ZIG_GRAMMAR: &str = include_str!("../../grammars/zig.sublime-syntax");
26
27/// Embedded Git Rebase Todo grammar for interactive rebase
28pub const GIT_REBASE_GRAMMAR: &str = include_str!("../../grammars/git-rebase.sublime-syntax");
29
30/// Embedded Git Commit Message grammar for COMMIT_EDITMSG, MERGE_MSG, etc.
31pub const GIT_COMMIT_GRAMMAR: &str = include_str!("../../grammars/git-commit.sublime-syntax");
32
33/// Embedded Gitignore grammar for .gitignore and similar files
34pub const GITIGNORE_GRAMMAR: &str = include_str!("../../grammars/gitignore.sublime-syntax");
35
36/// Embedded Git Config grammar for .gitconfig, .gitmodules
37pub const GITCONFIG_GRAMMAR: &str = include_str!("../../grammars/gitconfig.sublime-syntax");
38
39/// Embedded Git Attributes grammar for .gitattributes
40pub const GITATTRIBUTES_GRAMMAR: &str = include_str!("../../grammars/gitattributes.sublime-syntax");
41
42/// Embedded Typst grammar (syntect doesn't include one)
43pub const TYPST_GRAMMAR: &str = include_str!("../../grammars/typst.sublime-syntax");
44
45/// Registry of all available TextMate grammars.
46///
47/// This struct holds the compiled syntax set and provides lookup methods.
48/// It does not perform I/O directly - use `GrammarLoader` for loading grammars.
49impl std::fmt::Debug for GrammarRegistry {
50    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51        f.debug_struct("GrammarRegistry")
52            .field("syntax_count", &self.syntax_set.syntaxes().len())
53            .finish()
54    }
55}
56
57pub struct GrammarRegistry {
58    /// Combined syntax set (built-in + embedded + user grammars)
59    syntax_set: Arc<SyntaxSet>,
60    /// Extension -> scope name mapping for user grammars (takes priority)
61    user_extensions: HashMap<String, String>,
62    /// Filename -> scope name mapping for dotfiles and special files
63    filename_scopes: HashMap<String, String>,
64    /// Paths to dynamically loaded grammar files (for reloading when adding more)
65    loaded_grammar_paths: Vec<(String, PathBuf, Vec<String>)>,
66}
67
68impl GrammarRegistry {
69    /// Create a new GrammarRegistry from pre-built components.
70    ///
71    /// This is typically called by `GrammarLoader` implementations after
72    /// loading grammars from various sources.
73    pub fn new(
74        syntax_set: SyntaxSet,
75        user_extensions: HashMap<String, String>,
76        filename_scopes: HashMap<String, String>,
77    ) -> Self {
78        Self {
79            syntax_set: Arc::new(syntax_set),
80            user_extensions,
81            filename_scopes,
82            loaded_grammar_paths: Vec::new(),
83        }
84    }
85
86    /// Create an empty grammar registry (fast, for tests that don't need syntax highlighting)
87    pub fn empty() -> Arc<Self> {
88        let mut builder = SyntaxSetBuilder::new();
89        builder.add_plain_text_syntax();
90        Arc::new(Self {
91            syntax_set: Arc::new(builder.build()),
92            user_extensions: HashMap::new(),
93            filename_scopes: HashMap::new(),
94            loaded_grammar_paths: Vec::new(),
95        })
96    }
97
98    /// Create a registry with only syntect's pre-compiled defaults (~0ms).
99    ///
100    /// This provides instant syntax highlighting for ~50 common languages
101    /// (Rust, Python, JS/TS, C/C++, Go, Java, HTML, CSS, Markdown, etc.)
102    /// without any `SyntaxSetBuilder::build()` call. Use this at startup,
103    /// then swap in a full registry built on a background thread.
104    pub fn defaults_only() -> Arc<Self> {
105        let syntax_set = SyntaxSet::load_defaults_newlines();
106        let filename_scopes = Self::build_filename_scopes();
107        Arc::new(Self {
108            syntax_set: Arc::new(syntax_set),
109            user_extensions: HashMap::new(),
110            filename_scopes,
111            loaded_grammar_paths: Vec::new(),
112        })
113    }
114
115    /// Build the default filename -> scope mappings for dotfiles and special files.
116    pub fn build_filename_scopes() -> HashMap<String, String> {
117        let mut map = HashMap::new();
118
119        // Shell configuration files -> Bash/Shell script scope
120        let shell_scope = "source.shell.bash".to_string();
121        for filename in [
122            ".zshrc",
123            ".zprofile",
124            ".zshenv",
125            ".zlogin",
126            ".zlogout",
127            ".bash_aliases",
128            // .bashrc and .bash_profile are already recognized by syntect
129            // Common shell script files without extensions
130            "PKGBUILD",
131            "APKBUILD",
132        ] {
133            map.insert(filename.to_string(), shell_scope.clone());
134        }
135
136        // Git rebase todo files
137        let git_rebase_scope = "source.git-rebase-todo".to_string();
138        map.insert("git-rebase-todo".to_string(), git_rebase_scope);
139
140        // Git commit message files
141        let git_commit_scope = "source.git-commit".to_string();
142        for filename in ["COMMIT_EDITMSG", "MERGE_MSG", "SQUASH_MSG", "TAG_EDITMSG"] {
143            map.insert(filename.to_string(), git_commit_scope.clone());
144        }
145
146        // Gitignore and similar files
147        let gitignore_scope = "source.gitignore".to_string();
148        for filename in [".gitignore", ".dockerignore", ".npmignore", ".hgignore"] {
149            map.insert(filename.to_string(), gitignore_scope.clone());
150        }
151
152        // Git config files
153        let gitconfig_scope = "source.gitconfig".to_string();
154        for filename in [".gitconfig", ".gitmodules"] {
155            map.insert(filename.to_string(), gitconfig_scope.clone());
156        }
157
158        // Git attributes files
159        let gitattributes_scope = "source.gitattributes".to_string();
160        map.insert(".gitattributes".to_string(), gitattributes_scope);
161
162        map
163    }
164
165    /// Add embedded grammars (TOML, Odin, etc.) to a syntax set builder.
166    pub fn add_embedded_grammars(builder: &mut SyntaxSetBuilder) {
167        // TOML grammar
168        match SyntaxDefinition::load_from_str(TOML_GRAMMAR, true, Some("TOML")) {
169            Ok(syntax) => {
170                builder.add(syntax);
171                tracing::debug!("Loaded embedded TOML grammar");
172            }
173            Err(e) => {
174                tracing::warn!("Failed to load embedded TOML grammar: {}", e);
175            }
176        }
177
178        // Odin grammar
179        match SyntaxDefinition::load_from_str(ODIN_GRAMMAR, true, Some("Odin")) {
180            Ok(syntax) => {
181                builder.add(syntax);
182                tracing::debug!("Loaded embedded Odin grammar");
183            }
184            Err(e) => {
185                tracing::warn!("Failed to load embedded Odin grammar: {}", e);
186            }
187        }
188
189        // Zig grammar
190        match SyntaxDefinition::load_from_str(ZIG_GRAMMAR, true, Some("Zig")) {
191            Ok(syntax) => {
192                builder.add(syntax);
193                tracing::debug!("Loaded embedded Zig grammar");
194            }
195            Err(e) => {
196                tracing::warn!("Failed to load embedded Zig grammar: {}", e);
197            }
198        }
199
200        // Git Rebase Todo grammar
201        match SyntaxDefinition::load_from_str(GIT_REBASE_GRAMMAR, true, Some("Git Rebase Todo")) {
202            Ok(syntax) => {
203                builder.add(syntax);
204                tracing::debug!("Loaded embedded Git Rebase Todo grammar");
205            }
206            Err(e) => {
207                tracing::warn!("Failed to load embedded Git Rebase Todo grammar: {}", e);
208            }
209        }
210
211        // Git Commit Message grammar
212        match SyntaxDefinition::load_from_str(GIT_COMMIT_GRAMMAR, true, Some("Git Commit Message"))
213        {
214            Ok(syntax) => {
215                builder.add(syntax);
216                tracing::debug!("Loaded embedded Git Commit Message grammar");
217            }
218            Err(e) => {
219                tracing::warn!("Failed to load embedded Git Commit Message grammar: {}", e);
220            }
221        }
222
223        // Gitignore grammar
224        match SyntaxDefinition::load_from_str(GITIGNORE_GRAMMAR, true, Some("Gitignore")) {
225            Ok(syntax) => {
226                builder.add(syntax);
227                tracing::debug!("Loaded embedded Gitignore grammar");
228            }
229            Err(e) => {
230                tracing::warn!("Failed to load embedded Gitignore grammar: {}", e);
231            }
232        }
233
234        // Git Config grammar
235        match SyntaxDefinition::load_from_str(GITCONFIG_GRAMMAR, true, Some("Git Config")) {
236            Ok(syntax) => {
237                builder.add(syntax);
238                tracing::debug!("Loaded embedded Git Config grammar");
239            }
240            Err(e) => {
241                tracing::warn!("Failed to load embedded Git Config grammar: {}", e);
242            }
243        }
244
245        // Git Attributes grammar
246        match SyntaxDefinition::load_from_str(GITATTRIBUTES_GRAMMAR, true, Some("Git Attributes")) {
247            Ok(syntax) => {
248                builder.add(syntax);
249                tracing::debug!("Loaded embedded Git Attributes grammar");
250            }
251            Err(e) => {
252                tracing::warn!("Failed to load embedded Git Attributes grammar: {}", e);
253            }
254        }
255
256        // Typst grammar
257        match SyntaxDefinition::load_from_str(TYPST_GRAMMAR, true, Some("Typst")) {
258            Ok(syntax) => {
259                builder.add(syntax);
260                tracing::debug!("Loaded embedded Typst grammar");
261            }
262            Err(e) => {
263                tracing::warn!("Failed to load embedded Typst grammar: {}", e);
264            }
265        }
266    }
267
268    /// Find syntax for a file by path/extension/filename.
269    ///
270    /// Checks in order:
271    /// 1. User-configured grammar extensions (by scope)
272    /// 2. By extension (includes built-in + embedded grammars)
273    /// 3. By filename (custom dotfile mappings like .zshrc)
274    /// 4. By filename via syntect (handles Makefile, .bashrc, etc.)
275    pub fn find_syntax_for_file(&self, path: &Path) -> Option<&SyntaxReference> {
276        // Try extension-based lookup first
277        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
278            // Check user grammars first (higher priority)
279            if let Some(scope) = self.user_extensions.get(ext) {
280                tracing::info!("[SYNTAX DEBUG] find_syntax_for_file: found ext '{}' in user_extensions -> scope '{}'", ext, scope);
281                if let Some(syntax) = syntect::parsing::Scope::new(scope)
282                    .ok()
283                    .and_then(|s| self.syntax_set.find_syntax_by_scope(s))
284                {
285                    tracing::info!(
286                        "[SYNTAX DEBUG] find_syntax_for_file: found syntax by scope: {}",
287                        syntax.name
288                    );
289                    return Some(syntax);
290                } else {
291                    tracing::info!(
292                        "[SYNTAX DEBUG] find_syntax_for_file: scope '{}' not found in syntax_set",
293                        scope
294                    );
295                }
296            } else {
297                tracing::info!(
298                    "[SYNTAX DEBUG] find_syntax_for_file: ext '{}' NOT in user_extensions",
299                    ext
300                );
301            }
302
303            // Try extension lookup (includes embedded grammars like TOML)
304            if let Some(syntax) = self.syntax_set.find_syntax_by_extension(ext) {
305                tracing::info!(
306                    "[SYNTAX DEBUG] find_syntax_for_file: found by syntect extension: {}",
307                    syntax.name
308                );
309                return Some(syntax);
310            }
311        }
312
313        // Try filename-based lookup for dotfiles and special files
314        if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
315            if let Some(scope) = self.filename_scopes.get(filename) {
316                if let Some(syntax) = syntect::parsing::Scope::new(scope)
317                    .ok()
318                    .and_then(|s| self.syntax_set.find_syntax_by_scope(s))
319                {
320                    return Some(syntax);
321                }
322            }
323        }
324
325        // Try syntect's full file detection (handles special filenames like Makefile)
326        // This may do I/O for first-line detection, but handles many cases
327        if let Ok(Some(syntax)) = self.syntax_set.find_syntax_for_file(path) {
328            return Some(syntax);
329        }
330
331        tracing::info!(
332            "[SYNTAX DEBUG] find_syntax_for_file: no syntax found for {:?}",
333            path
334        );
335        None
336    }
337
338    /// Find syntax for a file, checking user-configured languages first.
339    ///
340    /// This method extends `find_syntax_for_file` by first checking the provided
341    /// languages configuration for filename and extension matches. This allows
342    /// users to configure custom filename patterns (like PKGBUILD for bash) that
343    /// will be respected for syntax highlighting.
344    ///
345    /// Checks in order:
346    /// 1. User-configured language filenames from config (exact match)
347    /// 2. User-configured language filenames from config (glob patterns)
348    /// 3. User-configured language extensions from config
349    /// 4. Falls back to `find_syntax_for_file` for built-in detection
350    pub fn find_syntax_for_file_with_languages(
351        &self,
352        path: &Path,
353        languages: &std::collections::HashMap<String, crate::config::LanguageConfig>,
354    ) -> Option<&SyntaxReference> {
355        let extension = path.extension().and_then(|e| e.to_str());
356        tracing::info!(
357            "[SYNTAX DEBUG] find_syntax_for_file_with_languages: path={:?}, ext={:?}, languages_config_keys={:?}",
358            path,
359            extension,
360            languages.keys().collect::<Vec<_>>()
361        );
362
363        // Try filename match from languages config first (exact then glob)
364        if let Some(filename) = path.file_name().and_then(|f| f.to_str()) {
365            // First pass: exact matches only (highest priority)
366            for (lang_name, lang_config) in languages.iter() {
367                if lang_config
368                    .filenames
369                    .iter()
370                    .any(|f| !is_glob_pattern(f) && f == filename)
371                {
372                    tracing::info!(
373                        "[SYNTAX DEBUG] filename match: {} -> grammar '{}'",
374                        lang_name,
375                        lang_config.grammar
376                    );
377                    if let Some(syntax) = self.find_syntax_for_lang_config(lang_config) {
378                        return Some(syntax);
379                    }
380                }
381            }
382
383            // Second pass: glob pattern matches
384            // Path patterns (containing `/`) are matched against the full path;
385            // filename-only patterns are matched against just the filename.
386            let path_str = path.to_str().unwrap_or("");
387            for (lang_name, lang_config) in languages.iter() {
388                if lang_config.filenames.iter().any(|f| {
389                    if !is_glob_pattern(f) {
390                        return false;
391                    }
392                    if is_path_pattern(f) {
393                        path_glob_matches(f, path_str)
394                    } else {
395                        filename_glob_matches(f, filename)
396                    }
397                }) {
398                    tracing::info!(
399                        "[SYNTAX DEBUG] filename glob match: {} -> grammar '{}'",
400                        lang_name,
401                        lang_config.grammar
402                    );
403                    if let Some(syntax) = self.find_syntax_for_lang_config(lang_config) {
404                        return Some(syntax);
405                    }
406                }
407            }
408        }
409
410        // Try extension match from languages config
411        if let Some(extension) = extension {
412            for (lang_name, lang_config) in languages.iter() {
413                if lang_config.extensions.iter().any(|ext| ext == extension) {
414                    tracing::info!(
415                        "[SYNTAX DEBUG] extension match in config: ext={}, lang={}, grammar='{}'",
416                        extension,
417                        lang_name,
418                        lang_config.grammar
419                    );
420                    // Found a match - try to find syntax by grammar name
421                    if let Some(syntax) = self.find_syntax_by_name(&lang_config.grammar) {
422                        tracing::info!(
423                            "[SYNTAX DEBUG] found syntax by grammar name: {}",
424                            syntax.name
425                        );
426                        return Some(syntax);
427                    } else {
428                        tracing::info!(
429                            "[SYNTAX DEBUG] grammar name '{}' not found in registry",
430                            lang_config.grammar
431                        );
432                    }
433                }
434            }
435        }
436
437        // Fall back to built-in detection
438        tracing::info!("[SYNTAX DEBUG] falling back to find_syntax_for_file");
439        let result = self.find_syntax_for_file(path);
440        tracing::info!(
441            "[SYNTAX DEBUG] find_syntax_for_file result: {:?}",
442            result.map(|s| &s.name)
443        );
444        result
445    }
446
447    /// Helper: given a language config, find the syntax reference for it.
448    fn find_syntax_for_lang_config(
449        &self,
450        lang_config: &crate::config::LanguageConfig,
451    ) -> Option<&SyntaxReference> {
452        if let Some(syntax) = self.find_syntax_by_name(&lang_config.grammar) {
453            tracing::info!(
454                "[SYNTAX DEBUG] found syntax by grammar name: {}",
455                syntax.name
456            );
457            return Some(syntax);
458        }
459        // Also try finding by extension if grammar name didn't work
460        // (some grammars are named differently)
461        if !lang_config.extensions.is_empty() {
462            if let Some(ext) = lang_config.extensions.first() {
463                if let Some(syntax) = self.syntax_set.find_syntax_by_extension(ext) {
464                    tracing::info!(
465                        "[SYNTAX DEBUG] found syntax by extension fallback: {}",
466                        syntax.name
467                    );
468                    return Some(syntax);
469                }
470            }
471        }
472        None
473    }
474
475    /// Find syntax by first line content (shebang, mode line, etc.)
476    ///
477    /// Use this when you have the file content but path-based detection failed.
478    pub fn find_syntax_by_first_line(&self, first_line: &str) -> Option<&SyntaxReference> {
479        self.syntax_set.find_syntax_by_first_line(first_line)
480    }
481
482    /// Find syntax by scope name
483    pub fn find_syntax_by_scope(&self, scope: &str) -> Option<&SyntaxReference> {
484        let scope = syntect::parsing::Scope::new(scope).ok()?;
485        self.syntax_set.find_syntax_by_scope(scope)
486    }
487
488    /// Find syntax by name (case-insensitive)
489    ///
490    /// This allows config files to use lowercase grammar names like "go" while
491    /// matching syntect's actual names like "Go".
492    pub fn find_syntax_by_name(&self, name: &str) -> Option<&SyntaxReference> {
493        // Try exact match first
494        if let Some(syntax) = self.syntax_set.find_syntax_by_name(name) {
495            return Some(syntax);
496        }
497        // Fall back to case-insensitive match
498        let name_lower = name.to_lowercase();
499        self.syntax_set
500            .syntaxes()
501            .iter()
502            .find(|s| s.name.to_lowercase() == name_lower)
503    }
504
505    /// Get the underlying syntax set
506    pub fn syntax_set(&self) -> &Arc<SyntaxSet> {
507        &self.syntax_set
508    }
509
510    /// Get a clone of the Arc for sharing
511    pub fn syntax_set_arc(&self) -> Arc<SyntaxSet> {
512        Arc::clone(&self.syntax_set)
513    }
514
515    /// List all available syntax names
516    pub fn available_syntaxes(&self) -> Vec<&str> {
517        self.syntax_set
518            .syntaxes()
519            .iter()
520            .map(|s| s.name.as_str())
521            .collect()
522    }
523
524    /// Debug helper: get user extensions as a string for logging
525    pub fn user_extensions_debug(&self) -> String {
526        format!("{:?}", self.user_extensions.keys().collect::<Vec<_>>())
527    }
528
529    /// Check if a syntax is available for an extension
530    pub fn has_syntax_for_extension(&self, ext: &str) -> bool {
531        if self.user_extensions.contains_key(ext) {
532            return true;
533        }
534
535        // Check built-in syntaxes
536        let dummy_path = PathBuf::from(format!("file.{}", ext));
537        self.syntax_set
538            .find_syntax_for_file(&dummy_path)
539            .ok()
540            .flatten()
541            .is_some()
542    }
543
544    /// Get the user extensions mapping (extension -> scope name)
545    pub fn user_extensions(&self) -> &HashMap<String, String> {
546        &self.user_extensions
547    }
548
549    /// Get the filename scopes mapping (filename -> scope name)
550    pub fn filename_scopes(&self) -> &HashMap<String, String> {
551        &self.filename_scopes
552    }
553
554    /// Create a new registry with additional grammar files
555    ///
556    /// This builds a new GrammarRegistry that includes all grammars from
557    /// the base registry plus the additional grammars specified.
558    ///
559    /// # Arguments
560    /// * `base` - The base registry to extend
561    /// * `additional` - List of (language, path, extensions) tuples for new grammars
562    ///
563    /// # Returns
564    /// A new GrammarRegistry with the additional grammars, or None if rebuilding fails
565    pub fn with_additional_grammars(
566        base: &GrammarRegistry,
567        additional: &[(String, PathBuf, Vec<String>)],
568    ) -> Option<Self> {
569        tracing::info!(
570            "[SYNTAX DEBUG] with_additional_grammars: adding {} grammars, base has {} user_extensions, {} previously loaded grammars",
571            additional.len(),
572            base.user_extensions.len(),
573            base.loaded_grammar_paths.len()
574        );
575
576        // Start with defaults and embedded grammars (same as Default impl)
577        let defaults = SyntaxSet::load_defaults_newlines();
578        let mut builder = defaults.into_builder();
579        Self::add_embedded_grammars(&mut builder);
580
581        // Start fresh with user extensions - we'll rebuild from loaded grammars
582        let mut user_extensions = HashMap::new();
583
584        // Track all loaded grammar paths (existing + new)
585        let mut loaded_grammar_paths = base.loaded_grammar_paths.clone();
586
587        // First, reload all previously loaded grammars from base
588        for (language, path, extensions) in &base.loaded_grammar_paths {
589            tracing::info!(
590                "[SYNTAX DEBUG] reloading existing grammar: lang='{}', path={:?}",
591                language,
592                path
593            );
594            match Self::load_grammar_file(path) {
595                Ok(syntax) => {
596                    let scope = syntax.scope.to_string();
597                    builder.add(syntax);
598                    for ext in extensions {
599                        user_extensions.insert(ext.clone(), scope.clone());
600                    }
601                }
602                Err(e) => {
603                    tracing::warn!(
604                        "Failed to reload grammar for '{}' from {:?}: {}",
605                        language,
606                        path,
607                        e
608                    );
609                }
610            }
611        }
612
613        // Add each new grammar
614        for (language, path, extensions) in additional {
615            tracing::info!(
616                "[SYNTAX DEBUG] loading new grammar file: lang='{}', path={:?}, extensions={:?}",
617                language,
618                path,
619                extensions
620            );
621            match Self::load_grammar_file(path) {
622                Ok(syntax) => {
623                    let scope = syntax.scope.to_string();
624                    tracing::info!(
625                        "[SYNTAX DEBUG] grammar loaded successfully: name='{}', scope='{}'",
626                        syntax.name,
627                        scope
628                    );
629                    builder.add(syntax);
630                    tracing::info!(
631                        "Loaded grammar for '{}' from {:?} with extensions {:?}",
632                        language,
633                        path,
634                        extensions
635                    );
636                    // Register extensions for this grammar
637                    for ext in extensions {
638                        user_extensions.insert(ext.clone(), scope.clone());
639                    }
640                    // Track this grammar path for future reloads
641                    loaded_grammar_paths.push((language.clone(), path.clone(), extensions.clone()));
642                }
643                Err(e) => {
644                    tracing::warn!(
645                        "Failed to load grammar for '{}' from {:?}: {}",
646                        language,
647                        path,
648                        e
649                    );
650                }
651            }
652        }
653
654        Some(Self {
655            syntax_set: Arc::new(builder.build()),
656            user_extensions,
657            filename_scopes: base.filename_scopes.clone(),
658            loaded_grammar_paths,
659        })
660    }
661
662    /// Load a grammar file from disk
663    ///
664    /// Only Sublime Text (.sublime-syntax) format is supported.
665    /// TextMate (.tmLanguage) grammars use a completely different format
666    /// and cannot be loaded by syntect's yaml-load feature.
667    fn load_grammar_file(path: &Path) -> Result<SyntaxDefinition, String> {
668        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
669
670        match ext {
671            "sublime-syntax" => {
672                let content = std::fs::read_to_string(path)
673                    .map_err(|e| format!("Failed to read file: {}", e))?;
674                SyntaxDefinition::load_from_str(
675                    &content,
676                    true,
677                    path.file_stem().and_then(|s| s.to_str()),
678                )
679                .map_err(|e| format!("Failed to parse sublime-syntax: {}", e))
680            }
681            _ => Err(format!(
682                "Unsupported grammar format: .{}. Only .sublime-syntax is supported.",
683                ext
684            )),
685        }
686    }
687}
688
689impl Default for GrammarRegistry {
690    fn default() -> Self {
691        // Create with defaults and embedded grammars only (no user grammars)
692        let defaults = SyntaxSet::load_defaults_newlines();
693        let mut builder = defaults.into_builder();
694        Self::add_embedded_grammars(&mut builder);
695        let syntax_set = builder.build();
696        let filename_scopes = Self::build_filename_scopes();
697
698        Self::new(syntax_set, HashMap::new(), filename_scopes)
699    }
700}
701
702// VSCode package.json structures for parsing grammar manifests
703
704#[derive(Debug, Deserialize)]
705pub struct PackageManifest {
706    #[serde(default)]
707    pub contributes: Option<Contributes>,
708}
709
710#[derive(Debug, Deserialize, Default)]
711pub struct Contributes {
712    #[serde(default)]
713    pub languages: Vec<LanguageContribution>,
714    #[serde(default)]
715    pub grammars: Vec<GrammarContribution>,
716}
717
718#[derive(Debug, Deserialize)]
719pub struct LanguageContribution {
720    pub id: String,
721    #[serde(default)]
722    pub extensions: Vec<String>,
723}
724
725#[derive(Debug, Deserialize)]
726pub struct GrammarContribution {
727    pub language: String,
728    #[serde(rename = "scopeName")]
729    pub scope_name: String,
730    pub path: String,
731}
732
733#[cfg(test)]
734mod tests {
735    use super::*;
736
737    #[test]
738    fn test_empty_registry() {
739        let registry = GrammarRegistry::empty();
740        // Should have at least plain text
741        assert!(!registry.available_syntaxes().is_empty());
742    }
743
744    #[test]
745    fn test_default_registry() {
746        let registry = GrammarRegistry::default();
747        // Should have built-in syntaxes
748        assert!(!registry.available_syntaxes().is_empty());
749    }
750
751    #[test]
752    fn test_find_syntax_for_common_extensions() {
753        let registry = GrammarRegistry::default();
754
755        // Test common extensions that syntect should support
756        let test_cases = [
757            ("test.py", true),
758            ("test.rs", true),
759            ("test.js", true),
760            ("test.json", true),
761            ("test.md", true),
762            ("test.html", true),
763            ("test.css", true),
764            ("test.unknown_extension_xyz", false),
765        ];
766
767        for (filename, should_exist) in test_cases {
768            let path = Path::new(filename);
769            let result = registry.find_syntax_for_file(path);
770            assert_eq!(
771                result.is_some(),
772                should_exist,
773                "Expected {:?} for {}",
774                should_exist,
775                filename
776            );
777        }
778    }
779
780    #[test]
781    fn test_syntax_set_arc() {
782        let registry = GrammarRegistry::default();
783        let arc1 = registry.syntax_set_arc();
784        let arc2 = registry.syntax_set_arc();
785        // Both should point to the same data
786        assert!(Arc::ptr_eq(&arc1, &arc2));
787    }
788
789    #[test]
790    fn test_shell_dotfiles_detection() {
791        let registry = GrammarRegistry::default();
792
793        // All these should be detected as shell scripts
794        let shell_files = [".zshrc", ".zprofile", ".zshenv", ".bash_aliases"];
795
796        for filename in shell_files {
797            let path = Path::new(filename);
798            let result = registry.find_syntax_for_file(path);
799            assert!(
800                result.is_some(),
801                "{} should be detected as a syntax",
802                filename
803            );
804            let syntax = result.unwrap();
805            // Should be detected as Bash/Shell
806            assert!(
807                syntax.name.to_lowercase().contains("bash")
808                    || syntax.name.to_lowercase().contains("shell"),
809                "{} should be detected as shell/bash, got: {}",
810                filename,
811                syntax.name
812            );
813        }
814    }
815
816    #[test]
817    fn test_pkgbuild_detection() {
818        let registry = GrammarRegistry::default();
819
820        // PKGBUILD and APKBUILD should be detected as shell scripts
821        for filename in ["PKGBUILD", "APKBUILD"] {
822            let path = Path::new(filename);
823            let result = registry.find_syntax_for_file(path);
824            assert!(
825                result.is_some(),
826                "{} should be detected as a syntax",
827                filename
828            );
829            let syntax = result.unwrap();
830            // Should be detected as Bash/Shell
831            assert!(
832                syntax.name.to_lowercase().contains("bash")
833                    || syntax.name.to_lowercase().contains("shell"),
834                "{} should be detected as shell/bash, got: {}",
835                filename,
836                syntax.name
837            );
838        }
839    }
840
841    #[test]
842    fn test_find_syntax_with_glob_filenames() {
843        let registry = GrammarRegistry::default();
844        let mut languages = std::collections::HashMap::new();
845        languages.insert(
846            "shell-configs".to_string(),
847            crate::config::LanguageConfig {
848                extensions: vec!["sh".to_string()],
849                filenames: vec!["*.conf".to_string(), "*rc".to_string()],
850                grammar: "bash".to_string(),
851                comment_prefix: Some("#".to_string()),
852                auto_indent: true,
853                auto_close: None,
854                auto_surround: None,
855                highlighter: crate::config::HighlighterPreference::Auto,
856                textmate_grammar: None,
857                show_whitespace_tabs: true,
858                use_tabs: false,
859                tab_size: None,
860                formatter: None,
861                format_on_save: false,
862                on_save: vec![],
863            },
864        );
865
866        // *.conf should match
867        let result =
868            registry.find_syntax_for_file_with_languages(Path::new("nftables.conf"), &languages);
869        assert!(result.is_some(), "*.conf should match nftables.conf");
870
871        // *rc should match
872        let result = registry.find_syntax_for_file_with_languages(Path::new("lfrc"), &languages);
873        assert!(result.is_some(), "*rc should match lfrc");
874
875        // Unrelated file should not match via glob
876        let result =
877            registry.find_syntax_for_file_with_languages(Path::new("randomfile"), &languages);
878        // May still match via built-in detection, but not via our config
879        // Just verify it doesn't panic
880        let _ = result;
881    }
882
883    #[test]
884    fn test_find_syntax_with_path_glob_filenames() {
885        let registry = GrammarRegistry::default();
886        let mut languages = std::collections::HashMap::new();
887        languages.insert(
888            "shell-configs".to_string(),
889            crate::config::LanguageConfig {
890                extensions: vec!["sh".to_string()],
891                filenames: vec!["/etc/**/rc.*".to_string()],
892                grammar: "bash".to_string(),
893                comment_prefix: Some("#".to_string()),
894                auto_indent: true,
895                auto_close: None,
896                auto_surround: None,
897                highlighter: crate::config::HighlighterPreference::Auto,
898                textmate_grammar: None,
899                show_whitespace_tabs: true,
900                use_tabs: false,
901                tab_size: None,
902                formatter: None,
903                format_on_save: false,
904                on_save: vec![],
905            },
906        );
907
908        // /etc/**/rc.* should match via full path
909        let result =
910            registry.find_syntax_for_file_with_languages(Path::new("/etc/rc.conf"), &languages);
911        assert!(result.is_some(), "/etc/**/rc.* should match /etc/rc.conf");
912
913        let result = registry
914            .find_syntax_for_file_with_languages(Path::new("/etc/init/rc.local"), &languages);
915        assert!(
916            result.is_some(),
917            "/etc/**/rc.* should match /etc/init/rc.local"
918        );
919
920        // Should NOT match a different root
921        let result =
922            registry.find_syntax_for_file_with_languages(Path::new("/var/rc.conf"), &languages);
923        // /var/rc.conf won't match the path glob, but may match built-in detection
924        // Just verify no panic
925        let _ = result;
926    }
927
928    #[test]
929    fn test_exact_filename_takes_priority_over_glob() {
930        let registry = GrammarRegistry::default();
931        let mut languages = std::collections::HashMap::new();
932
933        // A language with exact filename "lfrc" -> python grammar
934        languages.insert(
935            "custom-lfrc".to_string(),
936            crate::config::LanguageConfig {
937                extensions: vec![],
938                filenames: vec!["lfrc".to_string()],
939                grammar: "python".to_string(),
940                comment_prefix: Some("#".to_string()),
941                auto_indent: true,
942                auto_close: None,
943                auto_surround: None,
944                highlighter: crate::config::HighlighterPreference::Auto,
945                textmate_grammar: None,
946                show_whitespace_tabs: true,
947                use_tabs: false,
948                tab_size: None,
949                formatter: None,
950                format_on_save: false,
951                on_save: vec![],
952            },
953        );
954
955        // A language with glob "*rc" -> bash grammar
956        languages.insert(
957            "rc-files".to_string(),
958            crate::config::LanguageConfig {
959                extensions: vec![],
960                filenames: vec!["*rc".to_string()],
961                grammar: "bash".to_string(),
962                comment_prefix: Some("#".to_string()),
963                auto_indent: true,
964                auto_close: None,
965                auto_surround: None,
966                highlighter: crate::config::HighlighterPreference::Auto,
967                textmate_grammar: None,
968                show_whitespace_tabs: true,
969                use_tabs: false,
970                tab_size: None,
971                formatter: None,
972                format_on_save: false,
973                on_save: vec![],
974            },
975        );
976
977        // "lfrc" should match the exact rule (python), not the glob (bash)
978        let result = registry.find_syntax_for_file_with_languages(Path::new("lfrc"), &languages);
979        assert!(result.is_some());
980        let syntax = result.unwrap();
981        assert!(
982            syntax.name.to_lowercase().contains("python"),
983            "exact match should win over glob, got: {}",
984            syntax.name
985        );
986    }
987}