Skip to main content

fresh/primitives/grammar/
types.rs

1//! Pure grammar registry types without I/O operations.
2//!
3//! This module contains the `GrammarRegistry` struct and all syntax lookup methods
4//! that don't require filesystem access. This enables WASM compatibility and easier testing.
5
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10use syntect::parsing::{SyntaxDefinition, SyntaxReference, SyntaxSet, SyntaxSetBuilder};
11
12// Re-export glob matching utilities for use by other modules
13pub use crate::primitives::glob_match::{
14    filename_glob_matches, is_glob_pattern, is_path_pattern, path_glob_matches,
15};
16
17/// A grammar specification: language name, path to grammar file, and associated file extensions.
18///
19/// Used to pass grammar information between the plugin layer, loader, and registry
20/// without relying on anonymous tuples.
21#[derive(Clone, Debug)]
22pub struct GrammarSpec {
23    /// Language identifier (e.g., "elixir")
24    pub language: String,
25    /// Path to the grammar file (.sublime-syntax)
26    pub path: PathBuf,
27    /// File extensions to associate with this grammar (e.g., ["ex", "exs"])
28    pub extensions: Vec<String>,
29}
30
31/// Where a grammar was loaded from.
32#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
33#[serde(tag = "type")]
34pub enum GrammarSource {
35    /// Built-in to Fresh (pre-compiled syntect defaults + embedded grammars)
36    #[serde(rename = "built-in")]
37    BuiltIn,
38    /// Installed from a user grammar directory (~/.config/fresh/grammars/)
39    #[serde(rename = "user")]
40    User { path: PathBuf },
41    /// From a language pack (~/.config/fresh/languages/packages/)
42    #[serde(rename = "language-pack")]
43    LanguagePack { name: String, path: PathBuf },
44    /// From a bundle package (~/.config/fresh/bundles/packages/)
45    #[serde(rename = "bundle")]
46    Bundle { name: String, path: PathBuf },
47    /// Registered by a plugin at runtime
48    #[serde(rename = "plugin")]
49    Plugin { plugin: String, path: PathBuf },
50}
51
52impl std::fmt::Display for GrammarSource {
53    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54        match self {
55            GrammarSource::BuiltIn => write!(f, "built-in"),
56            GrammarSource::User { path } => write!(f, "user ({})", path.display()),
57            GrammarSource::LanguagePack { name, .. } => write!(f, "language-pack ({})", name),
58            GrammarSource::Bundle { name, .. } => write!(f, "bundle ({})", name),
59            GrammarSource::Plugin { plugin, .. } => write!(f, "plugin ({})", plugin),
60        }
61    }
62}
63
64/// Information about an available grammar, including its provenance.
65#[derive(Clone, Debug, Serialize, Deserialize)]
66pub struct GrammarInfo {
67    /// The grammar name as used in config files (case-insensitive matching)
68    pub name: String,
69    /// Where this grammar was loaded from
70    pub source: GrammarSource,
71    /// File extensions associated with this grammar
72    pub file_extensions: Vec<String>,
73    /// Optional short name alias (e.g., "bash" for "Bourne Again Shell (bash)")
74    #[serde(default, skip_serializing_if = "Option::is_none")]
75    pub short_name: Option<String>,
76}
77
78/// Embedded TOML grammar (syntect doesn't include one)
79pub const TOML_GRAMMAR: &str = include_str!("../../grammars/toml.sublime-syntax");
80
81/// Embedded Odin grammar (syntect doesn't include one)
82/// From: https://github.com/Tetralux/sublime-odin (MIT License)
83pub const ODIN_GRAMMAR: &str = include_str!("../../grammars/odin/Odin.sublime-syntax");
84
85/// Embedded Zig grammar (syntect doesn't include one)
86pub const ZIG_GRAMMAR: &str = include_str!("../../grammars/zig.sublime-syntax");
87
88/// Embedded Git Rebase Todo grammar for interactive rebase
89pub const GIT_REBASE_GRAMMAR: &str = include_str!("../../grammars/git-rebase.sublime-syntax");
90
91/// Embedded Git Commit Message grammar for COMMIT_EDITMSG, MERGE_MSG, etc.
92pub const GIT_COMMIT_GRAMMAR: &str = include_str!("../../grammars/git-commit.sublime-syntax");
93
94/// Embedded Gitignore grammar for .gitignore and similar files
95pub const GITIGNORE_GRAMMAR: &str = include_str!("../../grammars/gitignore.sublime-syntax");
96
97/// Embedded Git Config grammar for .gitconfig, .gitmodules
98pub const GITCONFIG_GRAMMAR: &str = include_str!("../../grammars/gitconfig.sublime-syntax");
99
100/// Embedded Git Attributes grammar for .gitattributes
101pub const GITATTRIBUTES_GRAMMAR: &str = include_str!("../../grammars/gitattributes.sublime-syntax");
102
103/// Embedded Typst grammar (syntect doesn't include one)
104pub const TYPST_GRAMMAR: &str = include_str!("../../grammars/typst.sublime-syntax");
105
106/// Embedded Dockerfile grammar
107pub const DOCKERFILE_GRAMMAR: &str = include_str!("../../grammars/dockerfile.sublime-syntax");
108/// Embedded INI grammar (also handles .env, .cfg, .editorconfig, etc.)
109pub const INI_GRAMMAR: &str = include_str!("../../grammars/ini.sublime-syntax");
110/// Embedded CMake grammar
111pub const CMAKE_GRAMMAR: &str = include_str!("../../grammars/cmake.sublime-syntax");
112/// Embedded SCSS grammar
113pub const SCSS_GRAMMAR: &str = include_str!("../../grammars/scss.sublime-syntax");
114/// Embedded LESS grammar
115pub const LESS_GRAMMAR: &str = include_str!("../../grammars/less.sublime-syntax");
116/// Embedded PowerShell grammar
117pub const POWERSHELL_GRAMMAR: &str = include_str!("../../grammars/powershell.sublime-syntax");
118/// Embedded Kotlin grammar
119pub const KOTLIN_GRAMMAR: &str = include_str!("../../grammars/kotlin.sublime-syntax");
120/// Embedded Swift grammar
121pub const SWIFT_GRAMMAR: &str = include_str!("../../grammars/swift.sublime-syntax");
122/// Embedded Dart grammar
123pub const DART_GRAMMAR: &str = include_str!("../../grammars/dart.sublime-syntax");
124/// Embedded Elixir grammar
125pub const ELIXIR_GRAMMAR: &str = include_str!("../../grammars/elixir.sublime-syntax");
126/// Embedded F# grammar
127pub const FSHARP_GRAMMAR: &str = include_str!("../../grammars/fsharp.sublime-syntax");
128/// Embedded Nix grammar
129pub const NIX_GRAMMAR: &str = include_str!("../../grammars/nix.sublime-syntax");
130/// Embedded HCL/Terraform grammar
131pub const HCL_GRAMMAR: &str = include_str!("../../grammars/hcl.sublime-syntax");
132/// Embedded Protocol Buffers grammar
133pub const PROTOBUF_GRAMMAR: &str = include_str!("../../grammars/protobuf.sublime-syntax");
134/// Embedded GraphQL grammar
135pub const GRAPHQL_GRAMMAR: &str = include_str!("../../grammars/graphql.sublime-syntax");
136/// Embedded Julia grammar
137pub const JULIA_GRAMMAR: &str = include_str!("../../grammars/julia.sublime-syntax");
138/// Embedded Nim grammar
139pub const NIM_GRAMMAR: &str = include_str!("../../grammars/nim.sublime-syntax");
140/// Embedded Gleam grammar
141pub const GLEAM_GRAMMAR: &str = include_str!("../../grammars/gleam.sublime-syntax");
142/// Embedded V language grammar
143pub const VLANG_GRAMMAR: &str = include_str!("../../grammars/vlang.sublime-syntax");
144/// Embedded Solidity grammar
145pub const SOLIDITY_GRAMMAR: &str = include_str!("../../grammars/solidity.sublime-syntax");
146/// Embedded KDL grammar
147pub const KDL_GRAMMAR: &str = include_str!("../../grammars/kdl.sublime-syntax");
148/// Embedded Nushell grammar
149pub const NUSHELL_GRAMMAR: &str = include_str!("../../grammars/nushell.sublime-syntax");
150/// Embedded Starlark/Bazel grammar
151pub const STARLARK_GRAMMAR: &str = include_str!("../../grammars/starlark.sublime-syntax");
152/// Embedded Justfile grammar
153pub const JUSTFILE_GRAMMAR: &str = include_str!("../../grammars/justfile.sublime-syntax");
154/// Embedded Earthfile grammar
155pub const EARTHFILE_GRAMMAR: &str = include_str!("../../grammars/earthfile.sublime-syntax");
156/// Embedded Go Module grammar
157pub const GOMOD_GRAMMAR: &str = include_str!("../../grammars/gomod.sublime-syntax");
158/// Embedded Vue grammar
159pub const VUE_GRAMMAR: &str = include_str!("../../grammars/vue.sublime-syntax");
160/// Embedded Svelte grammar
161pub const SVELTE_GRAMMAR: &str = include_str!("../../grammars/svelte.sublime-syntax");
162/// Embedded Astro grammar
163pub const ASTRO_GRAMMAR: &str = include_str!("../../grammars/astro.sublime-syntax");
164/// Embedded Hyprlang grammar (Hyprland config)
165pub const HYPRLANG_GRAMMAR: &str = include_str!("../../grammars/hyprlang.sublime-syntax");
166/// Embedded AutoHotkey grammar
167/// From: https://github.com/SALZKARTOFFEEEL/ahk-sublime-syntax (MIT License)
168pub const AUTOHOTKEY_GRAMMAR: &str =
169    include_str!("../../grammars/autohotkey/AutoHotkey.sublime-syntax");
170
171/// Registry of all available TextMate grammars.
172///
173/// This struct holds the compiled syntax set and provides lookup methods.
174/// It does not perform I/O directly - use `GrammarLoader` for loading grammars.
175impl std::fmt::Debug for GrammarRegistry {
176    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
177        f.debug_struct("GrammarRegistry")
178            .field("syntax_count", &self.syntax_set.syntaxes().len())
179            .finish()
180    }
181}
182
183pub struct GrammarRegistry {
184    /// Combined syntax set (built-in + embedded + user grammars)
185    syntax_set: Arc<SyntaxSet>,
186    /// Extension -> scope name mapping for user grammars (takes priority)
187    user_extensions: HashMap<String, String>,
188    /// Filename -> scope name mapping for dotfiles and special files
189    filename_scopes: HashMap<String, String>,
190    /// Paths to dynamically loaded grammar files (for reloading when adding more)
191    loaded_grammar_paths: Vec<GrammarSpec>,
192    /// Provenance info for each grammar (keyed by grammar name)
193    grammar_sources: HashMap<String, GrammarInfo>,
194    /// Short name aliases: lowercase short_name -> full syntect grammar name.
195    /// Provides a deterministic, one-to-one mapping so users can write
196    /// `grammar = "bash"` instead of `grammar = "Bourne Again Shell (bash)"`.
197    aliases: HashMap<String, String>,
198}
199
200impl GrammarRegistry {
201    /// Create a new GrammarRegistry from pre-built components.
202    ///
203    /// This is typically called by `GrammarLoader` implementations after
204    /// loading grammars from various sources.
205    pub fn new(
206        syntax_set: SyntaxSet,
207        user_extensions: HashMap<String, String>,
208        filename_scopes: HashMap<String, String>,
209    ) -> Self {
210        Self::new_with_loaded_paths(
211            syntax_set,
212            user_extensions,
213            filename_scopes,
214            Vec::new(),
215            HashMap::new(),
216        )
217    }
218
219    /// Create a GrammarRegistry with pre-loaded grammar path tracking.
220    ///
221    /// Used by the loader when plugin grammars were included in the initial build,
222    /// so that `loaded_grammar_paths()` reflects what was actually loaded.
223    pub fn new_with_loaded_paths(
224        syntax_set: SyntaxSet,
225        user_extensions: HashMap<String, String>,
226        filename_scopes: HashMap<String, String>,
227        loaded_grammar_paths: Vec<GrammarSpec>,
228        grammar_sources: HashMap<String, GrammarInfo>,
229    ) -> Self {
230        Self {
231            syntax_set: Arc::new(syntax_set),
232            user_extensions,
233            filename_scopes,
234            loaded_grammar_paths,
235            grammar_sources,
236            aliases: HashMap::new(),
237        }
238    }
239
240    /// Create an empty grammar registry (fast, for tests that don't need syntax highlighting)
241    pub fn empty() -> Arc<Self> {
242        let mut builder = SyntaxSetBuilder::new();
243        builder.add_plain_text_syntax();
244        Arc::new(Self {
245            syntax_set: Arc::new(builder.build()),
246            user_extensions: HashMap::new(),
247            filename_scopes: HashMap::new(),
248            loaded_grammar_paths: Vec::new(),
249            grammar_sources: HashMap::new(),
250            aliases: HashMap::new(),
251        })
252    }
253
254    /// Create a registry with only syntect's pre-compiled defaults (~0ms).
255    ///
256    /// This provides instant syntax highlighting for ~50 common languages
257    /// (Rust, Python, JS/TS, C/C++, Go, Java, HTML, CSS, Markdown, etc.)
258    /// without any `SyntaxSetBuilder::build()` call. Use this at startup,
259    /// then swap in a full registry built on a background thread.
260    pub fn defaults_only() -> Arc<Self> {
261        // Load pre-compiled syntax set (defaults + embedded grammars) from
262        // build-time packdump. This avoids the expensive into_builder() + build()
263        // cycle at runtime (~12s → ~300ms).
264        tracing::info!("defaults_only: loading pre-compiled syntax packdump...");
265        let syntax_set: SyntaxSet = syntect::dumps::from_uncompressed_data(include_bytes!(
266            concat!(env!("OUT_DIR"), "/default_syntaxes.packdump")
267        ))
268        .expect("Failed to load pre-compiled syntax packdump");
269        tracing::info!(
270            "defaults_only: loaded ({} syntaxes)",
271            syntax_set.syntaxes().len()
272        );
273        let grammar_sources = Self::build_grammar_sources_from_syntax_set(&syntax_set);
274        let filename_scopes = Self::build_filename_scopes();
275        let extra_extensions = Self::build_extra_extensions();
276        let mut registry = Self {
277            syntax_set: Arc::new(syntax_set),
278            user_extensions: extra_extensions,
279            filename_scopes,
280            loaded_grammar_paths: Vec::new(),
281            grammar_sources,
282            aliases: HashMap::new(),
283        };
284        registry.populate_built_in_aliases();
285        Arc::new(registry)
286    }
287
288    /// Build extra extension -> scope mappings for extensions not covered by syntect defaults.
289    ///
290    /// These map common file extensions to existing syntect grammar scopes,
291    /// filling gaps where syntect's built-in extension lists are incomplete.
292    pub fn build_extra_extensions() -> HashMap<String, String> {
293        let mut map = HashMap::new();
294
295        // JavaScript variants not in syntect defaults (["js", "htc"])
296        let js_scope = "source.js".to_string();
297        map.insert("cjs".to_string(), js_scope.clone());
298        map.insert("mjs".to_string(), js_scope);
299
300        // Dockerfile variants (e.g. Dockerfile.dev -> .dev extension)
301        // These won't match by extension, handled by filename_scopes and first_line_match
302
303        map
304    }
305
306    /// Build the default filename -> scope mappings for dotfiles and special files.
307    pub fn build_filename_scopes() -> HashMap<String, String> {
308        let mut map = HashMap::new();
309
310        // Shell configuration files -> Bash/Shell script scope
311        let shell_scope = "source.shell.bash".to_string();
312        for filename in [
313            ".zshrc",
314            ".zprofile",
315            ".zshenv",
316            ".zlogin",
317            ".zlogout",
318            ".bash_aliases",
319            // .bashrc and .bash_profile are already recognized by syntect
320            // Common shell script files without extensions
321            "PKGBUILD",
322            "APKBUILD",
323        ] {
324            map.insert(filename.to_string(), shell_scope.clone());
325        }
326
327        // Git rebase todo files
328        let git_rebase_scope = "source.git-rebase-todo".to_string();
329        map.insert("git-rebase-todo".to_string(), git_rebase_scope);
330
331        // Git commit message files
332        let git_commit_scope = "source.git-commit".to_string();
333        for filename in ["COMMIT_EDITMSG", "MERGE_MSG", "SQUASH_MSG", "TAG_EDITMSG"] {
334            map.insert(filename.to_string(), git_commit_scope.clone());
335        }
336
337        // Gitignore and similar files
338        let gitignore_scope = "source.gitignore".to_string();
339        for filename in [".gitignore", ".dockerignore", ".npmignore", ".hgignore"] {
340            map.insert(filename.to_string(), gitignore_scope.clone());
341        }
342
343        // Git config files
344        let gitconfig_scope = "source.gitconfig".to_string();
345        for filename in [".gitconfig", ".gitmodules"] {
346            map.insert(filename.to_string(), gitconfig_scope.clone());
347        }
348
349        // Git attributes files
350        let gitattributes_scope = "source.gitattributes".to_string();
351        map.insert(".gitattributes".to_string(), gitattributes_scope);
352
353        // Jenkinsfile -> Groovy
354        let groovy_scope = "source.groovy".to_string();
355        map.insert("Jenkinsfile".to_string(), groovy_scope);
356
357        // Vagrantfile -> Ruby (syntect already handles this, but be explicit)
358        // Brewfile -> Ruby
359        let ruby_scope = "source.ruby".to_string();
360        map.insert("Brewfile".to_string(), ruby_scope);
361
362        // Dockerfile and variants (exact names; Dockerfile.* handled via prefix check)
363        let dockerfile_scope = "source.dockerfile".to_string();
364        map.insert("Dockerfile".to_string(), dockerfile_scope.clone());
365        map.insert("Containerfile".to_string(), dockerfile_scope.clone());
366        // Common Dockerfile variants
367        map.insert("Dockerfile.dev".to_string(), dockerfile_scope.clone());
368        map.insert("Dockerfile.prod".to_string(), dockerfile_scope.clone());
369        map.insert("Dockerfile.test".to_string(), dockerfile_scope.clone());
370        map.insert("Dockerfile.build".to_string(), dockerfile_scope.clone());
371
372        // CMake
373        let cmake_scope = "source.cmake".to_string();
374        map.insert("CMakeLists.txt".to_string(), cmake_scope);
375
376        // Starlark/Bazel
377        let starlark_scope = "source.starlark".to_string();
378        map.insert("BUILD".to_string(), starlark_scope.clone());
379        map.insert("BUILD.bazel".to_string(), starlark_scope.clone());
380        map.insert("WORKSPACE".to_string(), starlark_scope.clone());
381        map.insert("WORKSPACE.bazel".to_string(), starlark_scope.clone());
382        map.insert("Tiltfile".to_string(), starlark_scope);
383
384        // Justfile (various casings)
385        let justfile_scope = "source.justfile".to_string();
386        map.insert("justfile".to_string(), justfile_scope.clone());
387        map.insert("Justfile".to_string(), justfile_scope.clone());
388        map.insert(".justfile".to_string(), justfile_scope);
389
390        // EditorConfig -> INI
391        let ini_scope = "source.ini".to_string();
392        map.insert(".editorconfig".to_string(), ini_scope);
393
394        // Earthfile
395        let earthfile_scope = "source.earthfile".to_string();
396        map.insert("Earthfile".to_string(), earthfile_scope);
397
398        // Hyprlang (Hyprland config files)
399        let hyprlang_scope = "source.hyprlang".to_string();
400        map.insert("hyprland.conf".to_string(), hyprlang_scope.clone());
401        map.insert("hyprpaper.conf".to_string(), hyprlang_scope.clone());
402        map.insert("hyprlock.conf".to_string(), hyprlang_scope);
403
404        // go.mod / go.sum
405        let gomod_scope = "source.gomod".to_string();
406        map.insert("go.mod".to_string(), gomod_scope.clone());
407        map.insert("go.sum".to_string(), gomod_scope);
408
409        map
410    }
411
412    /// Add embedded grammars (TOML, Odin, etc.) to a syntax set builder.
413    pub fn add_embedded_grammars(builder: &mut SyntaxSetBuilder) {
414        // TOML grammar
415        match SyntaxDefinition::load_from_str(TOML_GRAMMAR, true, Some("TOML")) {
416            Ok(syntax) => {
417                builder.add(syntax);
418                tracing::debug!("Loaded embedded TOML grammar");
419            }
420            Err(e) => {
421                tracing::warn!("Failed to load embedded TOML grammar: {}", e);
422            }
423        }
424
425        // Odin grammar
426        match SyntaxDefinition::load_from_str(ODIN_GRAMMAR, true, Some("Odin")) {
427            Ok(syntax) => {
428                builder.add(syntax);
429                tracing::debug!("Loaded embedded Odin grammar");
430            }
431            Err(e) => {
432                tracing::warn!("Failed to load embedded Odin grammar: {}", e);
433            }
434        }
435
436        // Zig grammar
437        match SyntaxDefinition::load_from_str(ZIG_GRAMMAR, true, Some("Zig")) {
438            Ok(syntax) => {
439                builder.add(syntax);
440                tracing::debug!("Loaded embedded Zig grammar");
441            }
442            Err(e) => {
443                tracing::warn!("Failed to load embedded Zig grammar: {}", e);
444            }
445        }
446
447        // Git Rebase Todo grammar
448        match SyntaxDefinition::load_from_str(GIT_REBASE_GRAMMAR, true, Some("Git Rebase Todo")) {
449            Ok(syntax) => {
450                builder.add(syntax);
451                tracing::debug!("Loaded embedded Git Rebase Todo grammar");
452            }
453            Err(e) => {
454                tracing::warn!("Failed to load embedded Git Rebase Todo grammar: {}", e);
455            }
456        }
457
458        // Git Commit Message grammar
459        match SyntaxDefinition::load_from_str(GIT_COMMIT_GRAMMAR, true, Some("Git Commit Message"))
460        {
461            Ok(syntax) => {
462                builder.add(syntax);
463                tracing::debug!("Loaded embedded Git Commit Message grammar");
464            }
465            Err(e) => {
466                tracing::warn!("Failed to load embedded Git Commit Message grammar: {}", e);
467            }
468        }
469
470        // Gitignore grammar
471        match SyntaxDefinition::load_from_str(GITIGNORE_GRAMMAR, true, Some("Gitignore")) {
472            Ok(syntax) => {
473                builder.add(syntax);
474                tracing::debug!("Loaded embedded Gitignore grammar");
475            }
476            Err(e) => {
477                tracing::warn!("Failed to load embedded Gitignore grammar: {}", e);
478            }
479        }
480
481        // Git Config grammar
482        match SyntaxDefinition::load_from_str(GITCONFIG_GRAMMAR, true, Some("Git Config")) {
483            Ok(syntax) => {
484                builder.add(syntax);
485                tracing::debug!("Loaded embedded Git Config grammar");
486            }
487            Err(e) => {
488                tracing::warn!("Failed to load embedded Git Config grammar: {}", e);
489            }
490        }
491
492        // Git Attributes grammar
493        match SyntaxDefinition::load_from_str(GITATTRIBUTES_GRAMMAR, true, Some("Git Attributes")) {
494            Ok(syntax) => {
495                builder.add(syntax);
496                tracing::debug!("Loaded embedded Git Attributes grammar");
497            }
498            Err(e) => {
499                tracing::warn!("Failed to load embedded Git Attributes grammar: {}", e);
500            }
501        }
502
503        // Typst grammar
504        match SyntaxDefinition::load_from_str(TYPST_GRAMMAR, true, Some("Typst")) {
505            Ok(syntax) => {
506                builder.add(syntax);
507                tracing::debug!("Loaded embedded Typst grammar");
508            }
509            Err(e) => {
510                tracing::warn!("Failed to load embedded Typst grammar: {}", e);
511            }
512        }
513
514        // Additional embedded grammars for languages not in syntect defaults
515        let additional_grammars: &[(&str, &str)] = &[
516            (DOCKERFILE_GRAMMAR, "Dockerfile"),
517            (INI_GRAMMAR, "INI"),
518            (CMAKE_GRAMMAR, "CMake"),
519            (SCSS_GRAMMAR, "SCSS"),
520            (LESS_GRAMMAR, "LESS"),
521            (POWERSHELL_GRAMMAR, "PowerShell"),
522            (KOTLIN_GRAMMAR, "Kotlin"),
523            (SWIFT_GRAMMAR, "Swift"),
524            (DART_GRAMMAR, "Dart"),
525            (ELIXIR_GRAMMAR, "Elixir"),
526            (FSHARP_GRAMMAR, "FSharp"),
527            (NIX_GRAMMAR, "Nix"),
528            (HCL_GRAMMAR, "HCL"),
529            (PROTOBUF_GRAMMAR, "Protocol Buffers"),
530            (GRAPHQL_GRAMMAR, "GraphQL"),
531            (JULIA_GRAMMAR, "Julia"),
532            (NIM_GRAMMAR, "Nim"),
533            (GLEAM_GRAMMAR, "Gleam"),
534            (VLANG_GRAMMAR, "V"),
535            (SOLIDITY_GRAMMAR, "Solidity"),
536            (KDL_GRAMMAR, "KDL"),
537            (NUSHELL_GRAMMAR, "Nushell"),
538            (STARLARK_GRAMMAR, "Starlark"),
539            (JUSTFILE_GRAMMAR, "Justfile"),
540            (EARTHFILE_GRAMMAR, "Earthfile"),
541            (GOMOD_GRAMMAR, "Go Module"),
542            (VUE_GRAMMAR, "Vue"),
543            (SVELTE_GRAMMAR, "Svelte"),
544            (ASTRO_GRAMMAR, "Astro"),
545            (HYPRLANG_GRAMMAR, "Hyprlang"),
546            (AUTOHOTKEY_GRAMMAR, "AutoHotkey"),
547        ];
548
549        for (grammar_str, name) in additional_grammars {
550            match SyntaxDefinition::load_from_str(grammar_str, true, Some(name)) {
551                Ok(syntax) => {
552                    builder.add(syntax);
553                    tracing::debug!("Loaded embedded {} grammar", name);
554                }
555                Err(e) => {
556                    tracing::warn!("Failed to load embedded {} grammar: {}", name, e);
557                }
558            }
559        }
560    }
561
562    /// Find syntax for a file by path/extension/filename.
563    ///
564    /// Checks in order:
565    /// 1. User-configured grammar extensions (by scope)
566    /// 2. By extension (includes built-in + embedded grammars)
567    /// 3. By filename (custom dotfile mappings like .zshrc)
568    /// 4. By filename via syntect (handles Makefile, .bashrc, etc.)
569    pub fn find_syntax_for_file(&self, path: &Path) -> Option<&SyntaxReference> {
570        // Try filename-based lookup FIRST for dotfiles, special files, and exact matches
571        // This must come before extension lookup since files like CMakeLists.txt
572        // would otherwise match Plain Text via the .txt extension.
573        if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
574            if let Some(scope) = self.filename_scopes.get(filename) {
575                if let Some(syntax) = syntect::parsing::Scope::new(scope)
576                    .ok()
577                    .and_then(|s| self.syntax_set.find_syntax_by_scope(s))
578                {
579                    return Some(syntax);
580                }
581            }
582        }
583
584        // Try extension-based lookup
585        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
586            // Check user grammars first (higher priority)
587            if let Some(scope) = self.user_extensions.get(ext) {
588                tracing::info!("[SYNTAX DEBUG] find_syntax_for_file: found ext '{}' in user_extensions -> scope '{}'", ext, scope);
589                if let Some(syntax) = syntect::parsing::Scope::new(scope)
590                    .ok()
591                    .and_then(|s| self.syntax_set.find_syntax_by_scope(s))
592                {
593                    tracing::info!(
594                        "[SYNTAX DEBUG] find_syntax_for_file: found syntax by scope: {}",
595                        syntax.name
596                    );
597                    return Some(syntax);
598                } else {
599                    tracing::info!(
600                        "[SYNTAX DEBUG] find_syntax_for_file: scope '{}' not found in syntax_set",
601                        scope
602                    );
603                }
604            } else {
605                tracing::info!(
606                    "[SYNTAX DEBUG] find_syntax_for_file: ext '{}' NOT in user_extensions",
607                    ext
608                );
609            }
610
611            // Try extension lookup (includes embedded grammars like TOML)
612            if let Some(syntax) = self.syntax_set.find_syntax_by_extension(ext) {
613                tracing::info!(
614                    "[SYNTAX DEBUG] find_syntax_for_file: found by syntect extension: {}",
615                    syntax.name
616                );
617                return Some(syntax);
618            }
619        }
620
621        // Filename-based lookup already done above (before extension lookup)
622
623        // Try syntect's full file detection (handles special filenames like Makefile)
624        // This may do I/O for first-line detection, but handles many cases
625        if let Ok(Some(syntax)) = self.syntax_set.find_syntax_for_file(path) {
626            return Some(syntax);
627        }
628
629        tracing::info!(
630            "[SYNTAX DEBUG] find_syntax_for_file: no syntax found for {:?}",
631            path
632        );
633        None
634    }
635
636    /// Find syntax for a file, checking user-configured languages first.
637    ///
638    /// This method extends `find_syntax_for_file` by first checking the provided
639    /// languages configuration for filename and extension matches. This allows
640    /// users to configure custom filename patterns (like PKGBUILD for bash) that
641    /// will be respected for syntax highlighting.
642    ///
643    /// Checks in order:
644    /// 1. User-configured language filenames from config (exact match)
645    /// 2. User-configured language filenames from config (glob patterns)
646    /// 3. User-configured language extensions from config
647    /// 4. Falls back to `find_syntax_for_file` for built-in detection
648    pub fn find_syntax_for_file_with_languages(
649        &self,
650        path: &Path,
651        languages: &std::collections::HashMap<String, crate::config::LanguageConfig>,
652    ) -> Option<&SyntaxReference> {
653        let extension = path.extension().and_then(|e| e.to_str());
654        tracing::info!(
655            "[SYNTAX DEBUG] find_syntax_for_file_with_languages: path={:?}, ext={:?}, languages_config_keys={:?}",
656            path,
657            extension,
658            languages.keys().collect::<Vec<_>>()
659        );
660
661        // Track whether any user config rule matched, even if the grammar
662        // couldn't be resolved to a syntect syntax.  When a config match exists
663        // we must NOT fall through to built-in detection, which may map the
664        // extension to a completely different language (e.g. `.fish` → bash).
665        let mut config_matched = false;
666
667        // Try filename match from languages config first (exact then glob)
668        if let Some(filename) = path.file_name().and_then(|f| f.to_str()) {
669            // First pass: exact matches only (highest priority)
670            for (lang_name, lang_config) in languages.iter() {
671                if lang_config
672                    .filenames
673                    .iter()
674                    .any(|f| !is_glob_pattern(f) && f == filename)
675                {
676                    tracing::info!(
677                        "[SYNTAX DEBUG] filename match: {} -> grammar '{}'",
678                        lang_name,
679                        lang_config.grammar
680                    );
681                    if let Some(syntax) = self.find_syntax_for_lang_config(lang_config) {
682                        return Some(syntax);
683                    }
684                    config_matched = true;
685                }
686            }
687
688            // Second pass: glob pattern matches
689            // Path patterns (containing `/`) are matched against the full path;
690            // filename-only patterns are matched against just the filename.
691            let path_str = path.to_str().unwrap_or("");
692            for (lang_name, lang_config) in languages.iter() {
693                if lang_config.filenames.iter().any(|f| {
694                    if !is_glob_pattern(f) {
695                        return false;
696                    }
697                    if is_path_pattern(f) {
698                        path_glob_matches(f, path_str)
699                    } else {
700                        filename_glob_matches(f, filename)
701                    }
702                }) {
703                    tracing::info!(
704                        "[SYNTAX DEBUG] filename glob match: {} -> grammar '{}'",
705                        lang_name,
706                        lang_config.grammar
707                    );
708                    if let Some(syntax) = self.find_syntax_for_lang_config(lang_config) {
709                        return Some(syntax);
710                    }
711                    config_matched = true;
712                }
713            }
714        }
715
716        // Try extension match from languages config
717        if let Some(extension) = extension {
718            for (lang_name, lang_config) in languages.iter() {
719                if lang_config.extensions.iter().any(|ext| ext == extension) {
720                    tracing::info!(
721                        "[SYNTAX DEBUG] extension match in config: ext={}, lang={}, grammar='{}'",
722                        extension,
723                        lang_name,
724                        lang_config.grammar
725                    );
726                    // Only try grammar name lookup here (not the extension
727                    // fallback in find_syntax_for_lang_config).  The extension
728                    // fallback would use syntect's built-in mapping which may
729                    // return a wrong language (e.g. .fish → bash).
730                    if let Some(syntax) = self.find_syntax_by_name(&lang_config.grammar) {
731                        tracing::info!(
732                            "[SYNTAX DEBUG] found syntax by grammar name: {}",
733                            syntax.name
734                        );
735                        return Some(syntax);
736                    } else {
737                        tracing::info!(
738                            "[SYNTAX DEBUG] grammar name '{}' not found in registry",
739                            lang_config.grammar
740                        );
741                    }
742                    config_matched = true;
743                }
744            }
745        }
746
747        // Fall back to built-in detection only if no user config rule matched.
748        // When a config rule matched but the grammar couldn't be resolved (e.g.
749        // the user configured a language whose grammar isn't in syntect), we
750        // return None to avoid misdetecting the file as a different language.
751        if config_matched {
752            tracing::info!(
753                "[SYNTAX DEBUG] config matched but grammar not resolved; skipping built-in fallback"
754            );
755            return None;
756        }
757        tracing::info!("[SYNTAX DEBUG] falling back to find_syntax_for_file");
758        let result = self.find_syntax_for_file(path);
759        tracing::info!(
760            "[SYNTAX DEBUG] find_syntax_for_file result: {:?}",
761            result.map(|s| &s.name)
762        );
763        result
764    }
765
766    /// Given a language config, find the syntax reference for it.
767    ///
768    /// Tries grammar name first, then falls back to extension-based lookup.
769    /// This handles cases where the grammar name doesn't match syntect's name
770    /// (e.g., grammar `"c_sharp"` maps to syntect syntax `"C#"` via `.cs` extension).
771    pub fn find_syntax_for_lang_config(
772        &self,
773        lang_config: &crate::config::LanguageConfig,
774    ) -> Option<&SyntaxReference> {
775        if let Some(syntax) = self.find_syntax_by_name(&lang_config.grammar) {
776            tracing::info!(
777                "[SYNTAX DEBUG] found syntax by grammar name: {}",
778                syntax.name
779            );
780            return Some(syntax);
781        }
782        // Also try finding by extension if grammar name didn't work
783        // (some grammars are named differently)
784        if !lang_config.extensions.is_empty() {
785            if let Some(ext) = lang_config.extensions.first() {
786                if let Some(syntax) = self.syntax_set.find_syntax_by_extension(ext) {
787                    tracing::info!(
788                        "[SYNTAX DEBUG] found syntax by extension fallback: {}",
789                        syntax.name
790                    );
791                    return Some(syntax);
792                }
793            }
794        }
795        None
796    }
797
798    /// Find syntax by first line content (shebang, mode line, etc.)
799    ///
800    /// Use this when you have the file content but path-based detection failed.
801    pub fn find_syntax_by_first_line(&self, first_line: &str) -> Option<&SyntaxReference> {
802        self.syntax_set.find_syntax_by_first_line(first_line)
803    }
804
805    /// Find syntax by scope name
806    pub fn find_syntax_by_scope(&self, scope: &str) -> Option<&SyntaxReference> {
807        let scope = syntect::parsing::Scope::new(scope).ok()?;
808        self.syntax_set.find_syntax_by_scope(scope)
809    }
810
811    /// Find syntax by name, with alias resolution.
812    ///
813    /// Lookup order:
814    /// 1. Exact match against syntect grammar names
815    /// 2. Case-insensitive match against syntect grammar names
816    /// 3. Alias lookup (short_name -> full grammar name, then exact syntect match)
817    ///
818    /// This allows config files to use `"go"` (case-insensitive match of `"Go"`),
819    /// or `"bash"` (alias for `"Bourne Again Shell (bash)"`).
820    pub fn find_syntax_by_name(&self, name: &str) -> Option<&SyntaxReference> {
821        // 1. Exact match
822        if let Some(syntax) = self.syntax_set.find_syntax_by_name(name) {
823            return Some(syntax);
824        }
825        // 2. Case-insensitive match
826        let name_lower = name.to_lowercase();
827        if let Some(syntax) = self
828            .syntax_set
829            .syntaxes()
830            .iter()
831            .find(|s| s.name.to_lowercase() == name_lower)
832        {
833            return Some(syntax);
834        }
835        // 3. Alias lookup
836        if let Some(full_name) = self.aliases.get(&name_lower) {
837            return self.syntax_set.find_syntax_by_name(full_name);
838        }
839        None
840    }
841
842    // === Alias management ===
843
844    /// Hardcoded short-name aliases for built-in and embedded grammars.
845    ///
846    /// Each entry maps a short name (lowercase) to the exact syntect grammar name.
847    /// Only grammars whose full name differs significantly from a natural short
848    /// form need an entry here. Grammars already short (e.g., "Rust", "Go") are
849    /// reachable via case-insensitive matching and don't need aliases.
850    fn built_in_aliases() -> Vec<(&'static str, &'static str)> {
851        vec![
852            // Syntect built-in grammars with verbose names
853            ("bash", "Bourne Again Shell (bash)"),
854            ("shell", "Bourne Again Shell (bash)"),
855            ("sh", "Bourne Again Shell (bash)"),
856            ("c++", "C++"),
857            ("cpp", "C++"),
858            ("csharp", "C#"),
859            ("objc", "Objective-C"),
860            ("objcpp", "Objective-C++"),
861            ("regex", "Regular Expressions (Python)"),
862            ("regexp", "Regular Expressions (Python)"),
863            // Embedded grammars with multi-word or non-obvious names
864            ("proto", "Protocol Buffers"),
865            ("protobuf", "Protocol Buffers"),
866            ("gomod", "Go Module"),
867            ("git-rebase", "Git Rebase Todo"),
868            ("git-commit", "Git Commit Message"),
869            ("git-config", "Git Config"),
870            ("git-attributes", "Git Attributes"),
871            ("gitignore", "Gitignore"),
872            ("fsharp", "FSharp"),
873            ("f#", "FSharp"),
874            ("terraform", "HCL"),
875            ("tf", "HCL"),
876            ("ts", "TypeScript"),
877            ("js", "JavaScript"),
878            ("py", "Python"),
879            ("rb", "Ruby"),
880            ("rs", "Rust"),
881            ("md", "Markdown"),
882            ("yml", "YAML"),
883            ("dockerfile", "Dockerfile"),
884        ]
885    }
886
887    /// Populate aliases from the built-in table.
888    ///
889    /// Validates that:
890    /// - Each alias target (full name) exists in the syntax set
891    /// - No alias collides (case-insensitive) with an existing grammar full name
892    /// - No duplicate aliases exist
893    pub fn populate_built_in_aliases(&mut self) {
894        for (short, full) in Self::built_in_aliases() {
895            self.register_alias_inner(short, full, true);
896        }
897    }
898
899    /// Register a short-name alias for a grammar.
900    ///
901    /// Returns `true` if the alias was registered, `false` if rejected due to
902    /// collision or missing target. For built-in aliases, collisions panic
903    /// (they indicate a bug). For dynamic aliases, collisions log a warning.
904    pub fn register_alias(&mut self, short_name: &str, full_name: &str) -> bool {
905        self.register_alias_inner(short_name, full_name, false)
906    }
907
908    fn register_alias_inner(
909        &mut self,
910        short_name: &str,
911        full_name: &str,
912        is_built_in: bool,
913    ) -> bool {
914        let short_lower = short_name.to_lowercase();
915
916        // Validate: target grammar must exist in the syntax set
917        let target_exists = self
918            .syntax_set
919            .syntaxes()
920            .iter()
921            .any(|s| s.name.eq_ignore_ascii_case(full_name));
922        if !target_exists {
923            if is_built_in {
924                // Built-in alias targets should always exist; warn but don't panic
925                // (grammar might have been removed from syntect upstream)
926                tracing::warn!(
927                    "[grammar-alias] Built-in alias '{}' -> '{}': target grammar not found, skipping",
928                    short_name, full_name
929                );
930            } else {
931                tracing::warn!(
932                    "[grammar-alias] Alias '{}' -> '{}': target grammar not found, skipping",
933                    short_name,
934                    full_name
935                );
936            }
937            return false;
938        }
939
940        // Validate: short name must not collide (case-insensitive) with any grammar full name
941        let collides_with_full_name = self
942            .syntax_set
943            .syntaxes()
944            .iter()
945            .any(|s| s.name.eq_ignore_ascii_case(&short_lower));
946        if collides_with_full_name {
947            // This is actually fine — the short name matches a full name directly,
948            // so find_syntax_by_name's case-insensitive search will find it.
949            // No alias needed.
950            tracing::debug!(
951                "[grammar-alias] Alias '{}' matches an existing grammar name, skipping (not needed)",
952                short_name
953            );
954            return false;
955        }
956
957        // Validate: no duplicate alias (case-insensitive)
958        if let Some(existing_target) = self.aliases.get(&short_lower) {
959            if existing_target.eq_ignore_ascii_case(full_name) {
960                // Same mapping, no-op
961                return true;
962            }
963            let msg = format!(
964                "Alias '{}' already maps to '{}', cannot remap to '{}'",
965                short_name, existing_target, full_name
966            );
967            if is_built_in {
968                panic!("[grammar-alias] Built-in alias collision: {}", msg);
969            } else {
970                tracing::warn!("[grammar-alias] {}", msg);
971                return false;
972            }
973        }
974
975        // Resolve the exact syntect name (preserving original case)
976        let exact_name = self
977            .syntax_set
978            .syntaxes()
979            .iter()
980            .find(|s| s.name.eq_ignore_ascii_case(full_name))
981            .map(|s| s.name.clone())
982            .unwrap();
983
984        self.aliases.insert(short_lower, exact_name);
985        true
986    }
987
988    /// Get the aliases map (short_name -> full grammar name)
989    pub fn aliases(&self) -> &HashMap<String, String> {
990        &self.aliases
991    }
992
993    /// Look up the full grammar name for a short alias.
994    pub fn resolve_alias(&self, short_name: &str) -> Option<&str> {
995        self.aliases
996            .get(&short_name.to_lowercase())
997            .map(|s| s.as_str())
998    }
999
1000    /// Get the underlying syntax set
1001    pub fn syntax_set(&self) -> &Arc<SyntaxSet> {
1002        &self.syntax_set
1003    }
1004
1005    /// Get a clone of the Arc for sharing
1006    pub fn syntax_set_arc(&self) -> Arc<SyntaxSet> {
1007        Arc::clone(&self.syntax_set)
1008    }
1009
1010    /// List all available syntax names
1011    pub fn available_syntaxes(&self) -> Vec<&str> {
1012        self.syntax_set
1013            .syntaxes()
1014            .iter()
1015            .map(|s| s.name.as_str())
1016            .collect()
1017    }
1018
1019    /// List all available grammars with provenance information.
1020    ///
1021    /// Returns a sorted list of `GrammarInfo` entries. Each entry includes
1022    /// the grammar name, where it was loaded from, and associated file extensions.
1023    pub fn available_grammar_info(&self) -> Vec<GrammarInfo> {
1024        // Build reverse map: full_name -> list of short aliases
1025        let mut reverse_aliases: HashMap<&str, Vec<&str>> = HashMap::new();
1026        for (short, full) in &self.aliases {
1027            reverse_aliases
1028                .entry(full.as_str())
1029                .or_default()
1030                .push(short.as_str());
1031        }
1032
1033        let mut result: Vec<GrammarInfo> = self
1034            .syntax_set
1035            .syntaxes()
1036            .iter()
1037            .filter(|s| s.name != "Plain Text")
1038            .map(|s| {
1039                let name = s.name.clone();
1040                let source = self
1041                    .grammar_sources
1042                    .get(&name)
1043                    .map(|info| info.source.clone())
1044                    .unwrap_or(GrammarSource::BuiltIn);
1045                let file_extensions = s.file_extensions.clone();
1046                // Pick the first (shortest) alias as the canonical short name
1047                let short_name = reverse_aliases.get(name.as_str()).and_then(|aliases| {
1048                    aliases
1049                        .iter()
1050                        .min_by_key(|a| a.len())
1051                        .map(|a| a.to_string())
1052                });
1053                GrammarInfo {
1054                    name,
1055                    source,
1056                    file_extensions,
1057                    short_name,
1058                }
1059            })
1060            .collect();
1061        result.sort_by(|a, b| a.name.to_lowercase().cmp(&b.name.to_lowercase()));
1062        result
1063    }
1064
1065    /// Get the grammar sources map.
1066    pub fn grammar_sources(&self) -> &HashMap<String, GrammarInfo> {
1067        &self.grammar_sources
1068    }
1069
1070    /// Get a mutable reference to the grammar sources map.
1071    pub fn grammar_sources_mut(&mut self) -> &mut HashMap<String, GrammarInfo> {
1072        &mut self.grammar_sources
1073    }
1074
1075    /// Build grammar source info from a pre-compiled syntax set.
1076    ///
1077    /// All grammars in the packdump (syntect defaults + embedded) are tagged as built-in.
1078    pub fn build_grammar_sources_from_syntax_set(
1079        syntax_set: &SyntaxSet,
1080    ) -> HashMap<String, GrammarInfo> {
1081        let mut sources = HashMap::new();
1082        for syntax in syntax_set.syntaxes() {
1083            sources.insert(
1084                syntax.name.clone(),
1085                GrammarInfo {
1086                    name: syntax.name.clone(),
1087                    source: GrammarSource::BuiltIn,
1088                    file_extensions: syntax.file_extensions.clone(),
1089                    short_name: None,
1090                },
1091            );
1092        }
1093        sources
1094    }
1095
1096    /// Debug helper: get user extensions as a string for logging
1097    pub fn user_extensions_debug(&self) -> String {
1098        format!("{:?}", self.user_extensions.keys().collect::<Vec<_>>())
1099    }
1100
1101    /// Check if a syntax is available for an extension
1102    pub fn has_syntax_for_extension(&self, ext: &str) -> bool {
1103        if self.user_extensions.contains_key(ext) {
1104            return true;
1105        }
1106
1107        // Check built-in syntaxes
1108        let dummy_path = PathBuf::from(format!("file.{}", ext));
1109        self.syntax_set
1110            .find_syntax_for_file(&dummy_path)
1111            .ok()
1112            .flatten()
1113            .is_some()
1114    }
1115
1116    /// Get the user extensions mapping (extension -> scope name)
1117    pub fn user_extensions(&self) -> &HashMap<String, String> {
1118        &self.user_extensions
1119    }
1120
1121    /// Get the filename scopes mapping (filename -> scope name)
1122    pub fn filename_scopes(&self) -> &HashMap<String, String> {
1123        &self.filename_scopes
1124    }
1125
1126    /// Get the loaded grammar paths (for deduplication in flush_pending_grammars)
1127    pub fn loaded_grammar_paths(&self) -> &[GrammarSpec] {
1128        &self.loaded_grammar_paths
1129    }
1130
1131    /// Create a new registry with additional grammar files
1132    ///
1133    /// This builds a new GrammarRegistry that includes all grammars from
1134    /// the base registry plus the additional grammars specified.
1135    /// Uses the base registry's syntax_set as the builder base, preserving
1136    /// all existing grammars (user grammars, language packs, etc.).
1137    ///
1138    /// # Arguments
1139    /// * `base` - The base registry to extend
1140    /// * `additional` - List of (language, path, extensions) tuples for new grammars
1141    ///
1142    /// # Returns
1143    /// A new GrammarRegistry with the additional grammars, or None if rebuilding fails
1144    pub fn with_additional_grammars(
1145        base: &GrammarRegistry,
1146        additional: &[GrammarSpec],
1147    ) -> Option<Self> {
1148        tracing::info!(
1149            "[SYNTAX DEBUG] with_additional_grammars: adding {} grammars to base with {} syntaxes",
1150            additional.len(),
1151            base.syntax_set.syntaxes().len()
1152        );
1153
1154        // Use the base registry's syntax_set as builder base — this preserves
1155        // ALL existing grammars (defaults, embedded, user, language packs)
1156        // without needing to reload them from disk.
1157        let mut builder = (*base.syntax_set).clone().into_builder();
1158
1159        // Preserve existing user extensions and add new ones
1160        let mut user_extensions = base.user_extensions.clone();
1161
1162        // Track loaded grammar paths (existing + new)
1163        let mut loaded_grammar_paths = base.loaded_grammar_paths.clone();
1164
1165        // Preserve existing grammar sources
1166        let mut grammar_sources = base.grammar_sources.clone();
1167
1168        // Add each new grammar
1169        for spec in additional {
1170            tracing::info!(
1171                "[SYNTAX DEBUG] loading new grammar file: lang='{}', path={:?}, extensions={:?}",
1172                spec.language,
1173                spec.path,
1174                spec.extensions
1175            );
1176            match Self::load_grammar_file(&spec.path) {
1177                Ok(syntax) => {
1178                    let scope = syntax.scope.to_string();
1179                    let syntax_name = syntax.name.clone();
1180                    tracing::info!(
1181                        "[SYNTAX DEBUG] grammar loaded successfully: name='{}', scope='{}'",
1182                        syntax_name,
1183                        scope
1184                    );
1185                    builder.add(syntax);
1186                    tracing::info!(
1187                        "Loaded grammar for '{}' from {:?} with extensions {:?}",
1188                        spec.language,
1189                        spec.path,
1190                        spec.extensions
1191                    );
1192                    // Register extensions for this grammar
1193                    for ext in &spec.extensions {
1194                        user_extensions.insert(ext.clone(), scope.clone());
1195                    }
1196                    // Track provenance
1197                    grammar_sources.insert(
1198                        syntax_name.clone(),
1199                        GrammarInfo {
1200                            name: syntax_name,
1201                            source: GrammarSource::Plugin {
1202                                plugin: spec.language.clone(),
1203                                path: spec.path.clone(),
1204                            },
1205                            file_extensions: spec.extensions.clone(),
1206                            short_name: None,
1207                        },
1208                    );
1209                    // Track this grammar path for future reloads
1210                    loaded_grammar_paths.push(spec.clone());
1211                }
1212                Err(e) => {
1213                    tracing::warn!(
1214                        "Failed to load grammar for '{}' from {:?}: {}",
1215                        spec.language,
1216                        spec.path,
1217                        e
1218                    );
1219                }
1220            }
1221        }
1222
1223        Some(Self {
1224            syntax_set: Arc::new(builder.build()),
1225            user_extensions,
1226            filename_scopes: base.filename_scopes.clone(),
1227            loaded_grammar_paths,
1228            grammar_sources,
1229            aliases: base.aliases.clone(),
1230        })
1231    }
1232
1233    /// Load a grammar file from disk
1234    ///
1235    /// Only Sublime Text (.sublime-syntax) format is supported.
1236    /// TextMate (.tmLanguage) grammars use a completely different format
1237    /// and cannot be loaded by syntect's yaml-load feature.
1238    pub(crate) fn load_grammar_file(path: &Path) -> Result<SyntaxDefinition, String> {
1239        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
1240
1241        match ext {
1242            "sublime-syntax" => {
1243                let content = std::fs::read_to_string(path)
1244                    .map_err(|e| format!("Failed to read file: {}", e))?;
1245                SyntaxDefinition::load_from_str(
1246                    &content,
1247                    true,
1248                    path.file_stem().and_then(|s| s.to_str()),
1249                )
1250                .map_err(|e| format!("Failed to parse sublime-syntax: {}", e))
1251            }
1252            _ => Err(format!(
1253                "Unsupported grammar format: .{}. Only .sublime-syntax is supported.",
1254                ext
1255            )),
1256        }
1257    }
1258}
1259
1260impl Default for GrammarRegistry {
1261    fn default() -> Self {
1262        // Create with defaults and embedded grammars only (no user grammars)
1263        let defaults = SyntaxSet::load_defaults_newlines();
1264        let mut builder = defaults.into_builder();
1265        Self::add_embedded_grammars(&mut builder);
1266        let syntax_set = builder.build();
1267        let filename_scopes = Self::build_filename_scopes();
1268        let extra_extensions = Self::build_extra_extensions();
1269
1270        let mut registry = Self::new(syntax_set, extra_extensions, filename_scopes);
1271        registry.populate_built_in_aliases();
1272        registry
1273    }
1274}
1275
1276// VSCode package.json structures for parsing grammar manifests
1277
1278#[derive(Debug, Deserialize)]
1279pub struct PackageManifest {
1280    #[serde(default)]
1281    pub contributes: Option<Contributes>,
1282}
1283
1284#[derive(Debug, Deserialize, Default)]
1285pub struct Contributes {
1286    #[serde(default)]
1287    pub languages: Vec<LanguageContribution>,
1288    #[serde(default)]
1289    pub grammars: Vec<GrammarContribution>,
1290}
1291
1292#[derive(Debug, Deserialize)]
1293pub struct LanguageContribution {
1294    pub id: String,
1295    #[serde(default)]
1296    pub extensions: Vec<String>,
1297}
1298
1299#[derive(Debug, Deserialize)]
1300pub struct GrammarContribution {
1301    pub language: String,
1302    #[serde(rename = "scopeName")]
1303    pub scope_name: String,
1304    pub path: String,
1305}
1306
1307#[cfg(test)]
1308mod tests {
1309    use super::*;
1310
1311    #[test]
1312    fn test_empty_registry() {
1313        let registry = GrammarRegistry::empty();
1314        // Should have at least plain text
1315        assert!(!registry.available_syntaxes().is_empty());
1316    }
1317
1318    #[test]
1319    fn test_default_registry() {
1320        let registry = GrammarRegistry::default();
1321        // Should have built-in syntaxes
1322        assert!(!registry.available_syntaxes().is_empty());
1323    }
1324
1325    #[test]
1326    fn test_find_syntax_for_common_extensions() {
1327        let registry = GrammarRegistry::default();
1328
1329        // Test common extensions that syntect should support
1330        let test_cases = [
1331            ("test.py", true),
1332            ("test.rs", true),
1333            ("test.js", true),
1334            ("test.json", true),
1335            ("test.md", true),
1336            ("test.html", true),
1337            ("test.css", true),
1338            ("test.unknown_extension_xyz", false),
1339        ];
1340
1341        for (filename, should_exist) in test_cases {
1342            let path = Path::new(filename);
1343            let result = registry.find_syntax_for_file(path);
1344            assert_eq!(
1345                result.is_some(),
1346                should_exist,
1347                "Expected {:?} for {}",
1348                should_exist,
1349                filename
1350            );
1351        }
1352    }
1353
1354    #[test]
1355    fn test_syntax_set_arc() {
1356        let registry = GrammarRegistry::default();
1357        let arc1 = registry.syntax_set_arc();
1358        let arc2 = registry.syntax_set_arc();
1359        // Both should point to the same data
1360        assert!(Arc::ptr_eq(&arc1, &arc2));
1361    }
1362
1363    #[test]
1364    fn test_shell_dotfiles_detection() {
1365        let registry = GrammarRegistry::default();
1366
1367        // All these should be detected as shell scripts
1368        let shell_files = [".zshrc", ".zprofile", ".zshenv", ".bash_aliases"];
1369
1370        for filename in shell_files {
1371            let path = Path::new(filename);
1372            let result = registry.find_syntax_for_file(path);
1373            assert!(
1374                result.is_some(),
1375                "{} should be detected as a syntax",
1376                filename
1377            );
1378            let syntax = result.unwrap();
1379            // Should be detected as Bash/Shell
1380            assert!(
1381                syntax.name.to_lowercase().contains("bash")
1382                    || syntax.name.to_lowercase().contains("shell"),
1383                "{} should be detected as shell/bash, got: {}",
1384                filename,
1385                syntax.name
1386            );
1387        }
1388    }
1389
1390    #[test]
1391    fn test_pkgbuild_detection() {
1392        let registry = GrammarRegistry::default();
1393
1394        // PKGBUILD and APKBUILD should be detected as shell scripts
1395        for filename in ["PKGBUILD", "APKBUILD"] {
1396            let path = Path::new(filename);
1397            let result = registry.find_syntax_for_file(path);
1398            assert!(
1399                result.is_some(),
1400                "{} should be detected as a syntax",
1401                filename
1402            );
1403            let syntax = result.unwrap();
1404            // Should be detected as Bash/Shell
1405            assert!(
1406                syntax.name.to_lowercase().contains("bash")
1407                    || syntax.name.to_lowercase().contains("shell"),
1408                "{} should be detected as shell/bash, got: {}",
1409                filename,
1410                syntax.name
1411            );
1412        }
1413    }
1414
1415    #[test]
1416    fn test_find_syntax_with_glob_filenames() {
1417        let registry = GrammarRegistry::default();
1418        let mut languages = std::collections::HashMap::new();
1419        languages.insert(
1420            "shell-configs".to_string(),
1421            crate::config::LanguageConfig {
1422                extensions: vec!["sh".to_string()],
1423                filenames: vec!["*.conf".to_string(), "*rc".to_string()],
1424                grammar: "bash".to_string(),
1425                comment_prefix: Some("#".to_string()),
1426                auto_indent: true,
1427                auto_close: None,
1428                auto_surround: None,
1429                textmate_grammar: None,
1430                show_whitespace_tabs: true,
1431                line_wrap: None,
1432                wrap_column: None,
1433                page_view: None,
1434                page_width: None,
1435                use_tabs: None,
1436                tab_size: None,
1437                formatter: None,
1438                format_on_save: false,
1439                on_save: vec![],
1440                word_characters: None,
1441            },
1442        );
1443
1444        // *.conf should match
1445        let result =
1446            registry.find_syntax_for_file_with_languages(Path::new("nftables.conf"), &languages);
1447        assert!(result.is_some(), "*.conf should match nftables.conf");
1448
1449        // *rc should match
1450        let result = registry.find_syntax_for_file_with_languages(Path::new("lfrc"), &languages);
1451        assert!(result.is_some(), "*rc should match lfrc");
1452
1453        // Unrelated file should not match via glob
1454        let result =
1455            registry.find_syntax_for_file_with_languages(Path::new("randomfile"), &languages);
1456        // May still match via built-in detection, but not via our config
1457        // Just verify it doesn't panic
1458        let _ = result;
1459    }
1460
1461    #[test]
1462    fn test_find_syntax_with_path_glob_filenames() {
1463        let registry = GrammarRegistry::default();
1464        let mut languages = std::collections::HashMap::new();
1465        languages.insert(
1466            "shell-configs".to_string(),
1467            crate::config::LanguageConfig {
1468                extensions: vec!["sh".to_string()],
1469                filenames: vec!["/etc/**/rc.*".to_string()],
1470                grammar: "bash".to_string(),
1471                comment_prefix: Some("#".to_string()),
1472                auto_indent: true,
1473                auto_close: None,
1474                auto_surround: None,
1475                textmate_grammar: None,
1476                show_whitespace_tabs: true,
1477                line_wrap: None,
1478                wrap_column: None,
1479                page_view: None,
1480                page_width: None,
1481                use_tabs: None,
1482                tab_size: None,
1483                formatter: None,
1484                format_on_save: false,
1485                on_save: vec![],
1486                word_characters: None,
1487            },
1488        );
1489
1490        // /etc/**/rc.* should match via full path
1491        let result =
1492            registry.find_syntax_for_file_with_languages(Path::new("/etc/rc.conf"), &languages);
1493        assert!(result.is_some(), "/etc/**/rc.* should match /etc/rc.conf");
1494
1495        let result = registry
1496            .find_syntax_for_file_with_languages(Path::new("/etc/init/rc.local"), &languages);
1497        assert!(
1498            result.is_some(),
1499            "/etc/**/rc.* should match /etc/init/rc.local"
1500        );
1501
1502        // Should NOT match a different root
1503        let result =
1504            registry.find_syntax_for_file_with_languages(Path::new("/var/rc.conf"), &languages);
1505        // /var/rc.conf won't match the path glob, but may match built-in detection
1506        // Just verify no panic
1507        let _ = result;
1508    }
1509
1510    #[test]
1511    fn test_exact_filename_takes_priority_over_glob() {
1512        let registry = GrammarRegistry::default();
1513        let mut languages = std::collections::HashMap::new();
1514
1515        // A language with exact filename "lfrc" -> python grammar
1516        languages.insert(
1517            "custom-lfrc".to_string(),
1518            crate::config::LanguageConfig {
1519                extensions: vec![],
1520                filenames: vec!["lfrc".to_string()],
1521                grammar: "python".to_string(),
1522                comment_prefix: Some("#".to_string()),
1523                auto_indent: true,
1524                auto_close: None,
1525                auto_surround: None,
1526                textmate_grammar: None,
1527                show_whitespace_tabs: true,
1528                line_wrap: None,
1529                wrap_column: None,
1530                page_view: None,
1531                page_width: None,
1532                use_tabs: None,
1533                tab_size: None,
1534                formatter: None,
1535                format_on_save: false,
1536                on_save: vec![],
1537                word_characters: None,
1538            },
1539        );
1540
1541        // A language with glob "*rc" -> bash grammar
1542        languages.insert(
1543            "rc-files".to_string(),
1544            crate::config::LanguageConfig {
1545                extensions: vec![],
1546                filenames: vec!["*rc".to_string()],
1547                grammar: "bash".to_string(),
1548                comment_prefix: Some("#".to_string()),
1549                auto_indent: true,
1550                auto_close: None,
1551                auto_surround: None,
1552                textmate_grammar: None,
1553                show_whitespace_tabs: true,
1554                line_wrap: None,
1555                wrap_column: None,
1556                page_view: None,
1557                page_width: None,
1558                use_tabs: None,
1559                tab_size: None,
1560                formatter: None,
1561                format_on_save: false,
1562                on_save: vec![],
1563                word_characters: None,
1564            },
1565        );
1566
1567        // "lfrc" should match the exact rule (python), not the glob (bash)
1568        let result = registry.find_syntax_for_file_with_languages(Path::new("lfrc"), &languages);
1569        assert!(result.is_some());
1570        let syntax = result.unwrap();
1571        assert!(
1572            syntax.name.to_lowercase().contains("python"),
1573            "exact match should win over glob, got: {}",
1574            syntax.name
1575        );
1576    }
1577
1578    #[test]
1579    fn test_built_in_aliases_resolve() {
1580        let registry = GrammarRegistry::default();
1581
1582        // "bash" should resolve to "Bourne Again Shell (bash)" via alias
1583        let syntax = registry.find_syntax_by_name("bash");
1584        assert!(syntax.is_some(), "alias 'bash' should resolve");
1585        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1586
1587        // "cpp" should resolve to "C++"
1588        let syntax = registry.find_syntax_by_name("cpp");
1589        assert!(syntax.is_some(), "alias 'cpp' should resolve");
1590        assert_eq!(syntax.unwrap().name, "C++");
1591
1592        // "csharp" should resolve to "C#"
1593        let syntax = registry.find_syntax_by_name("csharp");
1594        assert!(syntax.is_some(), "alias 'csharp' should resolve");
1595        assert_eq!(syntax.unwrap().name, "C#");
1596
1597        // "sh" should also resolve to bash
1598        let syntax = registry.find_syntax_by_name("sh");
1599        assert!(syntax.is_some(), "alias 'sh' should resolve");
1600        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1601
1602        // "proto" should resolve to "Protocol Buffers"
1603        let syntax = registry.find_syntax_by_name("proto");
1604        assert!(syntax.is_some(), "alias 'proto' should resolve");
1605        assert_eq!(syntax.unwrap().name, "Protocol Buffers");
1606    }
1607
1608    #[test]
1609    fn test_alias_case_insensitive_input() {
1610        let registry = GrammarRegistry::default();
1611
1612        // Aliases should be case-insensitive on input
1613        let syntax = registry.find_syntax_by_name("BASH");
1614        assert!(
1615            syntax.is_some(),
1616            "alias 'BASH' should resolve case-insensitively"
1617        );
1618        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1619
1620        let syntax = registry.find_syntax_by_name("Cpp");
1621        assert!(
1622            syntax.is_some(),
1623            "alias 'Cpp' should resolve case-insensitively"
1624        );
1625        assert_eq!(syntax.unwrap().name, "C++");
1626    }
1627
1628    #[test]
1629    fn test_full_name_still_works() {
1630        let registry = GrammarRegistry::default();
1631
1632        // Full names should still work (exact match)
1633        let syntax = registry.find_syntax_by_name("Bourne Again Shell (bash)");
1634        assert!(syntax.is_some(), "full name should still resolve");
1635        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1636
1637        // Case-insensitive full name should still work
1638        let syntax = registry.find_syntax_by_name("bourne again shell (bash)");
1639        assert!(
1640            syntax.is_some(),
1641            "case-insensitive full name should resolve"
1642        );
1643        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1644    }
1645
1646    #[test]
1647    fn test_alias_does_not_shadow_full_names() {
1648        let registry = GrammarRegistry::default();
1649
1650        // "Rust" should resolve directly via case-insensitive match, not via alias
1651        let syntax = registry.find_syntax_by_name("rust");
1652        assert!(syntax.is_some());
1653        assert_eq!(syntax.unwrap().name, "Rust");
1654
1655        // "Go" should resolve directly
1656        let syntax = registry.find_syntax_by_name("go");
1657        assert!(syntax.is_some());
1658        assert_eq!(syntax.unwrap().name, "Go");
1659    }
1660
1661    #[test]
1662    fn test_register_alias_rejects_collision() {
1663        let mut registry = GrammarRegistry::default();
1664
1665        // Trying to register an alias that maps to two different targets should fail
1666        assert!(registry.register_alias("myalias", "Rust"));
1667        assert!(!registry.register_alias("myalias", "Go"));
1668
1669        // Same mapping is fine (idempotent)
1670        assert!(registry.register_alias("myalias", "Rust"));
1671    }
1672
1673    #[test]
1674    fn test_register_alias_rejects_nonexistent_target() {
1675        let mut registry = GrammarRegistry::default();
1676        assert!(!registry.register_alias("nope", "Nonexistent Grammar"));
1677    }
1678
1679    #[test]
1680    fn test_register_alias_skips_existing_grammar_name() {
1681        let mut registry = GrammarRegistry::default();
1682
1683        // "rust" case-insensitively matches the grammar "Rust", so no alias needed
1684        assert!(!registry.register_alias("rust", "Rust"));
1685        // Should still be resolvable via case-insensitive match
1686        assert!(registry.find_syntax_by_name("rust").is_some());
1687    }
1688
1689    #[test]
1690    fn test_available_grammar_info_includes_short_names() {
1691        let registry = GrammarRegistry::default();
1692        let infos = registry.available_grammar_info();
1693
1694        let bash_info = infos.iter().find(|g| g.name == "Bourne Again Shell (bash)");
1695        assert!(bash_info.is_some(), "bash grammar should be in the list");
1696        let bash_info = bash_info.unwrap();
1697        assert!(
1698            bash_info.short_name.is_some(),
1699            "bash grammar should have a short_name"
1700        );
1701        // The shortest alias for bash is "sh"
1702        assert_eq!(bash_info.short_name.as_deref(), Some("sh"));
1703    }
1704
1705    #[test]
1706    fn test_resolve_alias() {
1707        let registry = GrammarRegistry::default();
1708        assert_eq!(
1709            registry.resolve_alias("bash"),
1710            Some("Bourne Again Shell (bash)")
1711        );
1712        assert_eq!(registry.resolve_alias("nonexistent"), None);
1713    }
1714}