Skip to main content

fresh/primitives/grammar/
types.rs

1//! Pure grammar registry types without I/O operations.
2//!
3//! This module contains the `GrammarRegistry` struct and all syntax lookup methods
4//! that don't require filesystem access. This enables WASM compatibility and easier testing.
5
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10use syntect::parsing::{SyntaxDefinition, SyntaxReference, SyntaxSet, SyntaxSetBuilder};
11
12// Re-export glob matching utilities for use by other modules
13pub use crate::primitives::glob_match::{
14    filename_glob_matches, is_glob_pattern, is_path_pattern, path_glob_matches,
15};
16
17/// A grammar specification: language name, path to grammar file, and associated file extensions.
18///
19/// Used to pass grammar information between the plugin layer, loader, and registry
20/// without relying on anonymous tuples.
21#[derive(Clone, Debug)]
22pub struct GrammarSpec {
23    /// Language identifier (e.g., "elixir")
24    pub language: String,
25    /// Path to the grammar file (.sublime-syntax)
26    pub path: PathBuf,
27    /// File extensions to associate with this grammar (e.g., ["ex", "exs"])
28    pub extensions: Vec<String>,
29}
30
31/// Where a grammar was loaded from.
32#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
33#[serde(tag = "type")]
34pub enum GrammarSource {
35    /// Built-in to Fresh (pre-compiled syntect defaults + embedded grammars)
36    #[serde(rename = "built-in")]
37    BuiltIn,
38    /// Installed from a user grammar directory (~/.config/fresh/grammars/)
39    #[serde(rename = "user")]
40    User { path: PathBuf },
41    /// From a language pack (~/.config/fresh/languages/packages/)
42    #[serde(rename = "language-pack")]
43    LanguagePack { name: String, path: PathBuf },
44    /// From a bundle package (~/.config/fresh/bundles/packages/)
45    #[serde(rename = "bundle")]
46    Bundle { name: String, path: PathBuf },
47    /// Registered by a plugin at runtime
48    #[serde(rename = "plugin")]
49    Plugin { plugin: String, path: PathBuf },
50}
51
52impl std::fmt::Display for GrammarSource {
53    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54        match self {
55            GrammarSource::BuiltIn => write!(f, "built-in"),
56            GrammarSource::User { path } => write!(f, "user ({})", path.display()),
57            GrammarSource::LanguagePack { name, .. } => write!(f, "language-pack ({})", name),
58            GrammarSource::Bundle { name, .. } => write!(f, "bundle ({})", name),
59            GrammarSource::Plugin { plugin, .. } => write!(f, "plugin ({})", plugin),
60        }
61    }
62}
63
64/// Information about an available grammar, including its provenance.
65#[derive(Clone, Debug, Serialize, Deserialize)]
66pub struct GrammarInfo {
67    /// The grammar name as used in config files (case-insensitive matching)
68    pub name: String,
69    /// Where this grammar was loaded from
70    pub source: GrammarSource,
71    /// File extensions associated with this grammar
72    pub file_extensions: Vec<String>,
73    /// Optional short name alias (e.g., "bash" for "Bourne Again Shell (bash)")
74    #[serde(default, skip_serializing_if = "Option::is_none")]
75    pub short_name: Option<String>,
76}
77
78/// Bridge between syntect display names and `fresh_languages::Language`.
79///
80/// Most syntect grammars map one-to-one: "Rust" → `Language::Rust`. A few
81/// have verbose display names that don't match the tree-sitter enum's
82/// `display_name()`, and `Language::from_name` has fuzzy "contains shell"
83/// fallbacks that would wrongly tag Nushell as tree-sitter Bash. This is
84/// the one place we spell the exceptions out explicitly.
85const SYNTECT_TO_TREE_SITTER_ALIASES: &[(&str, fresh_languages::Language)] =
86    &[("Bourne Again Shell (bash)", fresh_languages::Language::Bash)];
87
88/// Resolve a syntect syntax display name to a tree-sitter language, using
89/// strict equality against the alias table and `Language::display_name()`.
90fn tree_sitter_for_syntect_name(display_name: &str) -> Option<fresh_languages::Language> {
91    for (syntect_name, lang) in SYNTECT_TO_TREE_SITTER_ALIASES {
92        if *syntect_name == display_name {
93            return Some(*lang);
94        }
95    }
96    fresh_languages::Language::all()
97        .iter()
98        .find(|l| l.display_name() == display_name)
99        .copied()
100}
101
102/// Which highlighters can serve a given `GrammarEntry`.
103///
104/// A catalog entry may come from syntect (a TextMate grammar indexed into
105/// `SyntaxSet`), tree-sitter (a `fresh_languages::Language`), or both.
106#[derive(Clone, Debug, Default)]
107pub struct GrammarEngines {
108    /// Index into `GrammarRegistry::syntax_set().syntaxes()`, if a syntect
109    /// grammar is available.
110    pub syntect: Option<usize>,
111    /// Tree-sitter language, if one is registered for this grammar.
112    pub tree_sitter: Option<fresh_languages::Language>,
113}
114
115/// A single entry in the unified grammar catalog.
116///
117/// Each entry represents one logical language (e.g. "Rust", "TypeScript") and
118/// records which highlighting engines can serve it, plus the names/extensions
119/// used to look it up. The catalog is the single source of truth for grammar
120/// lookups — `find_by_name`, `find_by_path`, `find_by_extension` all return
121/// entries from here, and both `HighlightEngine::from_entry` and
122/// `DetectedLanguage::from_entry` consume them.
123#[derive(Clone, Debug)]
124pub struct GrammarEntry {
125    /// Human-readable display name (e.g. "TypeScript", "Bourne Again Shell (bash)").
126    pub display_name: String,
127    /// Canonical language ID used in config and LSP (e.g. "typescript", "csharp").
128    pub language_id: String,
129    /// Short alias, if one exists (e.g. "ts" for TypeScript).
130    pub short_name: Option<String>,
131    /// File extensions (without leading dot).
132    pub extensions: Vec<String>,
133    /// Exact filenames that map to this grammar (e.g. "Dockerfile").
134    pub filenames: Vec<String>,
135    /// Filename globs from user config (e.g. "*.conf", "/etc/**/rc.*").
136    pub filename_globs: Vec<String>,
137    /// Where this grammar was loaded from.
138    pub source: GrammarSource,
139    /// Highlighters that can serve this entry.
140    pub engines: GrammarEngines,
141}
142
143/// Embedded TOML grammar (syntect doesn't include one)
144pub const TOML_GRAMMAR: &str = include_str!("../../grammars/toml.sublime-syntax");
145
146/// Embedded Odin grammar (syntect doesn't include one)
147/// From: https://github.com/Tetralux/sublime-odin (MIT License)
148pub const ODIN_GRAMMAR: &str = include_str!("../../grammars/odin/Odin.sublime-syntax");
149
150/// Embedded Zig grammar (syntect doesn't include one)
151pub const ZIG_GRAMMAR: &str = include_str!("../../grammars/zig.sublime-syntax");
152
153/// Embedded Git Rebase Todo grammar for interactive rebase
154pub const GIT_REBASE_GRAMMAR: &str = include_str!("../../grammars/git-rebase.sublime-syntax");
155
156/// Embedded Git Commit Message grammar for COMMIT_EDITMSG, MERGE_MSG, etc.
157pub const GIT_COMMIT_GRAMMAR: &str = include_str!("../../grammars/git-commit.sublime-syntax");
158
159/// Embedded Gitignore grammar for .gitignore and similar files
160pub const GITIGNORE_GRAMMAR: &str = include_str!("../../grammars/gitignore.sublime-syntax");
161
162/// Embedded Git Config grammar for .gitconfig, .gitmodules
163pub const GITCONFIG_GRAMMAR: &str = include_str!("../../grammars/gitconfig.sublime-syntax");
164
165/// Embedded Git Attributes grammar for .gitattributes
166pub const GITATTRIBUTES_GRAMMAR: &str = include_str!("../../grammars/gitattributes.sublime-syntax");
167
168/// Embedded Typst grammar (syntect doesn't include one)
169pub const TYPST_GRAMMAR: &str = include_str!("../../grammars/typst.sublime-syntax");
170
171/// Embedded Dockerfile grammar
172pub const DOCKERFILE_GRAMMAR: &str = include_str!("../../grammars/dockerfile.sublime-syntax");
173/// Embedded INI grammar (also handles .env, .cfg, .editorconfig, etc.)
174pub const INI_GRAMMAR: &str = include_str!("../../grammars/ini.sublime-syntax");
175/// Embedded CMake grammar
176pub const CMAKE_GRAMMAR: &str = include_str!("../../grammars/cmake.sublime-syntax");
177/// Embedded SCSS grammar
178pub const SCSS_GRAMMAR: &str = include_str!("../../grammars/scss.sublime-syntax");
179/// Embedded LESS grammar
180pub const LESS_GRAMMAR: &str = include_str!("../../grammars/less.sublime-syntax");
181/// Embedded PowerShell grammar
182pub const POWERSHELL_GRAMMAR: &str = include_str!("../../grammars/powershell.sublime-syntax");
183/// Embedded Kotlin grammar
184pub const KOTLIN_GRAMMAR: &str = include_str!("../../grammars/kotlin.sublime-syntax");
185/// Embedded Swift grammar
186pub const SWIFT_GRAMMAR: &str = include_str!("../../grammars/swift.sublime-syntax");
187/// Embedded Dart grammar
188pub const DART_GRAMMAR: &str = include_str!("../../grammars/dart.sublime-syntax");
189/// Embedded Elixir grammar
190pub const ELIXIR_GRAMMAR: &str = include_str!("../../grammars/elixir.sublime-syntax");
191/// Embedded F# grammar
192pub const FSHARP_GRAMMAR: &str = include_str!("../../grammars/fsharp.sublime-syntax");
193/// Embedded Nix grammar
194pub const NIX_GRAMMAR: &str = include_str!("../../grammars/nix.sublime-syntax");
195/// Embedded HCL/Terraform grammar
196pub const HCL_GRAMMAR: &str = include_str!("../../grammars/hcl.sublime-syntax");
197/// Embedded Protocol Buffers grammar
198pub const PROTOBUF_GRAMMAR: &str = include_str!("../../grammars/protobuf.sublime-syntax");
199/// Embedded GraphQL grammar
200pub const GRAPHQL_GRAMMAR: &str = include_str!("../../grammars/graphql.sublime-syntax");
201/// Embedded Julia grammar
202pub const JULIA_GRAMMAR: &str = include_str!("../../grammars/julia.sublime-syntax");
203/// Embedded Nim grammar
204pub const NIM_GRAMMAR: &str = include_str!("../../grammars/nim.sublime-syntax");
205/// Embedded Gleam grammar
206pub const GLEAM_GRAMMAR: &str = include_str!("../../grammars/gleam.sublime-syntax");
207/// Embedded V language grammar
208pub const VLANG_GRAMMAR: &str = include_str!("../../grammars/vlang.sublime-syntax");
209/// Embedded Solidity grammar
210pub const SOLIDITY_GRAMMAR: &str = include_str!("../../grammars/solidity.sublime-syntax");
211/// Embedded KDL grammar
212pub const KDL_GRAMMAR: &str = include_str!("../../grammars/kdl.sublime-syntax");
213/// Embedded Nushell grammar
214pub const NUSHELL_GRAMMAR: &str = include_str!("../../grammars/nushell.sublime-syntax");
215/// Embedded Starlark/Bazel grammar
216pub const STARLARK_GRAMMAR: &str = include_str!("../../grammars/starlark.sublime-syntax");
217/// Embedded Justfile grammar
218pub const JUSTFILE_GRAMMAR: &str = include_str!("../../grammars/justfile.sublime-syntax");
219/// Embedded Earthfile grammar
220pub const EARTHFILE_GRAMMAR: &str = include_str!("../../grammars/earthfile.sublime-syntax");
221/// Embedded Go Module grammar
222pub const GOMOD_GRAMMAR: &str = include_str!("../../grammars/gomod.sublime-syntax");
223/// Embedded Vue grammar
224pub const VUE_GRAMMAR: &str = include_str!("../../grammars/vue.sublime-syntax");
225/// Embedded Svelte grammar
226pub const SVELTE_GRAMMAR: &str = include_str!("../../grammars/svelte.sublime-syntax");
227/// Embedded Astro grammar
228pub const ASTRO_GRAMMAR: &str = include_str!("../../grammars/astro.sublime-syntax");
229/// Embedded Hyprlang grammar (Hyprland config)
230pub const HYPRLANG_GRAMMAR: &str = include_str!("../../grammars/hyprlang.sublime-syntax");
231/// Embedded AutoHotkey grammar
232/// From: https://github.com/SALZKARTOFFEEEL/ahk-sublime-syntax (MIT License)
233pub const AUTOHOTKEY_GRAMMAR: &str =
234    include_str!("../../grammars/autohotkey/AutoHotkey.sublime-syntax");
235/// Embedded Racket grammar (syntect doesn't include one)
236pub const RACKET_GRAMMAR: &str = include_str!("../../grammars/racket.sublime-syntax");
237/// Embedded Verilog grammar (HDL)
238pub const VERILOG_GRAMMAR: &str = include_str!("../../grammars/verilog.sublime-syntax");
239/// Embedded SystemVerilog grammar (HDL)
240pub const SYSTEMVERILOG_GRAMMAR: &str = include_str!("../../grammars/systemverilog.sublime-syntax");
241/// Embedded VHDL grammar (HDL)
242pub const VHDL_GRAMMAR: &str = include_str!("../../grammars/vhdl.sublime-syntax");
243
244pub const C3_GRAMMAR: &str = include_str!("../../grammars/c3.sublime-syntax");
245
246/// Registry of all available TextMate grammars.
247///
248/// This struct holds the compiled syntax set and provides lookup methods.
249/// It does not perform I/O directly - use `GrammarLoader` for loading grammars.
250impl std::fmt::Debug for GrammarRegistry {
251    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
252        f.debug_struct("GrammarRegistry")
253            .field("syntax_count", &self.syntax_set.syntaxes().len())
254            .finish()
255    }
256}
257
258pub struct GrammarRegistry {
259    /// Combined syntax set (built-in + embedded + user grammars)
260    syntax_set: Arc<SyntaxSet>,
261    /// Extension -> scope name mapping for user grammars (takes priority)
262    user_extensions: HashMap<String, String>,
263    /// Filename -> scope name mapping for dotfiles and special files
264    filename_scopes: HashMap<String, String>,
265    /// Paths to dynamically loaded grammar files (for reloading when adding more)
266    loaded_grammar_paths: Vec<GrammarSpec>,
267    /// Provenance info for each grammar (keyed by grammar name)
268    grammar_sources: HashMap<String, GrammarInfo>,
269    /// Short name aliases: lowercase short_name -> full syntect grammar name.
270    /// Provides a deterministic, one-to-one mapping so users can write
271    /// `grammar = "bash"` instead of `grammar = "Bourne Again Shell (bash)"`.
272    aliases: HashMap<String, String>,
273    /// Unified catalog of every known grammar. Rebuilt whenever the syntax set
274    /// or alias table changes. Lookups (`find_by_name`, `find_by_path`, ...)
275    /// all resolve against this.
276    catalog: Vec<GrammarEntry>,
277    /// Index from lowercased lookup keys (display name, language_id, short_name)
278    /// to catalog index.
279    catalog_by_name: HashMap<String, usize>,
280    /// Index from file extension (without dot) to catalog index.
281    catalog_by_extension: HashMap<String, usize>,
282    /// Index from filename to catalog index.
283    catalog_by_filename: HashMap<String, usize>,
284    /// The most recent language config handed to `apply_language_config`.
285    /// Retained so `rebuild_catalog` can replay it — otherwise a rebuild
286    /// (triggered by e.g. `populate_built_in_aliases`) silently wipes user
287    /// `[languages]` config that was merged on top.
288    applied_language_config: HashMap<String, crate::config::LanguageConfig>,
289    /// Monotonic generation, bumped on every catalog mutation. Lets
290    /// observers (plugin state snapshot) detect changes with one integer
291    /// compare instead of recounting entries.
292    catalog_gen: u64,
293}
294
295impl GrammarRegistry {
296    /// Create a new GrammarRegistry from pre-built components.
297    ///
298    /// This is typically called by `GrammarLoader` implementations after
299    /// loading grammars from various sources.
300    pub(crate) fn new(
301        syntax_set: SyntaxSet,
302        user_extensions: HashMap<String, String>,
303        filename_scopes: HashMap<String, String>,
304    ) -> Self {
305        Self::new_with_loaded_paths(
306            syntax_set,
307            user_extensions,
308            filename_scopes,
309            Vec::new(),
310            HashMap::new(),
311        )
312    }
313
314    /// Create a GrammarRegistry with pre-loaded grammar path tracking.
315    ///
316    /// Used by the loader when plugin grammars were included in the initial build,
317    /// so that `loaded_grammar_paths()` reflects what was actually loaded.
318    pub(crate) fn new_with_loaded_paths(
319        syntax_set: SyntaxSet,
320        user_extensions: HashMap<String, String>,
321        filename_scopes: HashMap<String, String>,
322        loaded_grammar_paths: Vec<GrammarSpec>,
323        grammar_sources: HashMap<String, GrammarInfo>,
324    ) -> Self {
325        let mut reg = Self {
326            syntax_set: Arc::new(syntax_set),
327            user_extensions,
328            filename_scopes,
329            loaded_grammar_paths,
330            grammar_sources,
331            aliases: HashMap::new(),
332            catalog: Vec::new(),
333            catalog_by_name: HashMap::new(),
334            catalog_by_extension: HashMap::new(),
335            catalog_by_filename: HashMap::new(),
336            applied_language_config: HashMap::new(),
337            catalog_gen: 0,
338        };
339        reg.rebuild_catalog();
340        reg
341    }
342
343    /// Create an empty grammar registry (fast, for tests that don't need syntax highlighting)
344    pub fn empty() -> Arc<Self> {
345        let mut builder = SyntaxSetBuilder::new();
346        builder.add_plain_text_syntax();
347        let mut reg = Self {
348            syntax_set: Arc::new(builder.build()),
349            user_extensions: HashMap::new(),
350            filename_scopes: HashMap::new(),
351            loaded_grammar_paths: Vec::new(),
352            grammar_sources: HashMap::new(),
353            aliases: HashMap::new(),
354            catalog: Vec::new(),
355            catalog_by_name: HashMap::new(),
356            catalog_by_extension: HashMap::new(),
357            catalog_by_filename: HashMap::new(),
358            applied_language_config: HashMap::new(),
359            catalog_gen: 0,
360        };
361        reg.rebuild_catalog();
362        Arc::new(reg)
363    }
364
365    /// Create a registry with only syntect's pre-compiled defaults (~0ms).
366    ///
367    /// This provides instant syntax highlighting for ~50 common languages
368    /// (Rust, Python, JS/TS, C/C++, Go, Java, HTML, CSS, Markdown, etc.)
369    /// without any `SyntaxSetBuilder::build()` call. Use this at startup,
370    /// then swap in a full registry built on a background thread.
371    pub fn defaults_only() -> Arc<Self> {
372        // Load pre-compiled syntax set (defaults + embedded grammars) from
373        // build-time packdump. This avoids the expensive into_builder() + build()
374        // cycle at runtime (~12s → ~300ms).
375        tracing::info!("defaults_only: loading pre-compiled syntax packdump...");
376        let syntax_set: SyntaxSet = syntect::dumps::from_uncompressed_data(include_bytes!(
377            concat!(env!("OUT_DIR"), "/default_syntaxes.packdump")
378        ))
379        .expect("Failed to load pre-compiled syntax packdump");
380        tracing::info!(
381            "defaults_only: loaded ({} syntaxes)",
382            syntax_set.syntaxes().len()
383        );
384        let grammar_sources = Self::build_grammar_sources_from_syntax_set(&syntax_set);
385        let filename_scopes = Self::build_filename_scopes();
386        let extra_extensions = Self::build_extra_extensions();
387        let mut registry = Self {
388            syntax_set: Arc::new(syntax_set),
389            user_extensions: extra_extensions,
390            filename_scopes,
391            loaded_grammar_paths: Vec::new(),
392            grammar_sources,
393            aliases: HashMap::new(),
394            catalog: Vec::new(),
395            catalog_by_name: HashMap::new(),
396            catalog_by_extension: HashMap::new(),
397            catalog_by_filename: HashMap::new(),
398            applied_language_config: HashMap::new(),
399            catalog_gen: 0,
400        };
401        registry.populate_built_in_aliases();
402        registry.rebuild_catalog();
403        Arc::new(registry)
404    }
405
406    /// Build extra extension -> scope mappings for extensions not covered by syntect defaults.
407    ///
408    /// These map common file extensions to existing syntect grammar scopes,
409    /// filling gaps where syntect's built-in extension lists are incomplete.
410    pub(crate) fn build_extra_extensions() -> HashMap<String, String> {
411        let mut map = HashMap::new();
412
413        // JavaScript variants not in syntect defaults (["js", "htc"])
414        let js_scope = "source.js".to_string();
415        map.insert("cjs".to_string(), js_scope.clone());
416        map.insert("mjs".to_string(), js_scope);
417
418        // Dockerfile variants (e.g. Dockerfile.dev -> .dev extension)
419        // These won't match by extension, handled by filename_scopes and first_line_match
420
421        map
422    }
423
424    /// Build the default filename -> scope mappings for dotfiles and special files.
425    pub(crate) fn build_filename_scopes() -> HashMap<String, String> {
426        let mut map = HashMap::new();
427
428        // Shell configuration files -> Bash/Shell script scope
429        let shell_scope = "source.shell.bash".to_string();
430        for filename in [
431            ".zshrc",
432            ".zprofile",
433            ".zshenv",
434            ".zlogin",
435            ".zlogout",
436            ".bash_aliases",
437            // .bashrc and .bash_profile are already recognized by syntect
438            // Common shell script files without extensions
439            "PKGBUILD",
440            "APKBUILD",
441        ] {
442            map.insert(filename.to_string(), shell_scope.clone());
443        }
444
445        // Git rebase todo files
446        let git_rebase_scope = "source.git-rebase-todo".to_string();
447        map.insert("git-rebase-todo".to_string(), git_rebase_scope);
448
449        // Git commit message files
450        let git_commit_scope = "source.git-commit".to_string();
451        for filename in ["COMMIT_EDITMSG", "MERGE_MSG", "SQUASH_MSG", "TAG_EDITMSG"] {
452            map.insert(filename.to_string(), git_commit_scope.clone());
453        }
454
455        // Gitignore and similar files
456        let gitignore_scope = "source.gitignore".to_string();
457        for filename in [".gitignore", ".dockerignore", ".npmignore", ".hgignore"] {
458            map.insert(filename.to_string(), gitignore_scope.clone());
459        }
460
461        // Git config files
462        let gitconfig_scope = "source.gitconfig".to_string();
463        for filename in [".gitconfig", ".gitmodules"] {
464            map.insert(filename.to_string(), gitconfig_scope.clone());
465        }
466
467        // Git attributes files
468        let gitattributes_scope = "source.gitattributes".to_string();
469        map.insert(".gitattributes".to_string(), gitattributes_scope);
470
471        // Jenkinsfile -> Groovy
472        let groovy_scope = "source.groovy".to_string();
473        map.insert("Jenkinsfile".to_string(), groovy_scope);
474
475        // Vagrantfile -> Ruby (syntect already handles this, but be explicit)
476        // Brewfile -> Ruby
477        let ruby_scope = "source.ruby".to_string();
478        map.insert("Brewfile".to_string(), ruby_scope);
479
480        // Dockerfile and variants (exact names; Dockerfile.* handled via prefix check)
481        let dockerfile_scope = "source.dockerfile".to_string();
482        map.insert("Dockerfile".to_string(), dockerfile_scope.clone());
483        map.insert("Containerfile".to_string(), dockerfile_scope.clone());
484        // Common Dockerfile variants
485        map.insert("Dockerfile.dev".to_string(), dockerfile_scope.clone());
486        map.insert("Dockerfile.prod".to_string(), dockerfile_scope.clone());
487        map.insert("Dockerfile.test".to_string(), dockerfile_scope.clone());
488        map.insert("Dockerfile.build".to_string(), dockerfile_scope.clone());
489
490        // CMake
491        let cmake_scope = "source.cmake".to_string();
492        map.insert("CMakeLists.txt".to_string(), cmake_scope);
493
494        // Starlark/Bazel
495        let starlark_scope = "source.starlark".to_string();
496        map.insert("BUILD".to_string(), starlark_scope.clone());
497        map.insert("BUILD.bazel".to_string(), starlark_scope.clone());
498        map.insert("WORKSPACE".to_string(), starlark_scope.clone());
499        map.insert("WORKSPACE.bazel".to_string(), starlark_scope.clone());
500        map.insert("Tiltfile".to_string(), starlark_scope);
501
502        // Justfile (various casings)
503        let justfile_scope = "source.justfile".to_string();
504        map.insert("justfile".to_string(), justfile_scope.clone());
505        map.insert("Justfile".to_string(), justfile_scope.clone());
506        map.insert(".justfile".to_string(), justfile_scope);
507
508        // EditorConfig -> INI
509        let ini_scope = "source.ini".to_string();
510        map.insert(".editorconfig".to_string(), ini_scope);
511
512        // Earthfile
513        let earthfile_scope = "source.earthfile".to_string();
514        map.insert("Earthfile".to_string(), earthfile_scope);
515
516        // Hyprlang (Hyprland config files)
517        let hyprlang_scope = "source.hyprlang".to_string();
518        map.insert("hyprland.conf".to_string(), hyprlang_scope.clone());
519        map.insert("hyprpaper.conf".to_string(), hyprlang_scope.clone());
520        map.insert("hyprlock.conf".to_string(), hyprlang_scope);
521
522        // go.mod / go.sum
523        let gomod_scope = "source.gomod".to_string();
524        map.insert("go.mod".to_string(), gomod_scope.clone());
525        map.insert("go.sum".to_string(), gomod_scope);
526
527        map
528    }
529
530    /// Add embedded grammars (TOML, Odin, etc.) to a syntax set builder.
531    pub(crate) fn add_embedded_grammars(builder: &mut SyntaxSetBuilder) {
532        // TOML grammar
533        match SyntaxDefinition::load_from_str(TOML_GRAMMAR, true, Some("TOML")) {
534            Ok(syntax) => {
535                builder.add(syntax);
536                tracing::debug!("Loaded embedded TOML grammar");
537            }
538            Err(e) => {
539                tracing::warn!("Failed to load embedded TOML grammar: {}", e);
540            }
541        }
542
543        // Odin grammar
544        match SyntaxDefinition::load_from_str(ODIN_GRAMMAR, true, Some("Odin")) {
545            Ok(syntax) => {
546                builder.add(syntax);
547                tracing::debug!("Loaded embedded Odin grammar");
548            }
549            Err(e) => {
550                tracing::warn!("Failed to load embedded Odin grammar: {}", e);
551            }
552        }
553
554        // Zig grammar
555        match SyntaxDefinition::load_from_str(ZIG_GRAMMAR, true, Some("Zig")) {
556            Ok(syntax) => {
557                builder.add(syntax);
558                tracing::debug!("Loaded embedded Zig grammar");
559            }
560            Err(e) => {
561                tracing::warn!("Failed to load embedded Zig grammar: {}", e);
562            }
563        }
564
565        // Git Rebase Todo grammar
566        match SyntaxDefinition::load_from_str(GIT_REBASE_GRAMMAR, true, Some("Git Rebase Todo")) {
567            Ok(syntax) => {
568                builder.add(syntax);
569                tracing::debug!("Loaded embedded Git Rebase Todo grammar");
570            }
571            Err(e) => {
572                tracing::warn!("Failed to load embedded Git Rebase Todo grammar: {}", e);
573            }
574        }
575
576        // Git Commit Message grammar
577        match SyntaxDefinition::load_from_str(GIT_COMMIT_GRAMMAR, true, Some("Git Commit Message"))
578        {
579            Ok(syntax) => {
580                builder.add(syntax);
581                tracing::debug!("Loaded embedded Git Commit Message grammar");
582            }
583            Err(e) => {
584                tracing::warn!("Failed to load embedded Git Commit Message grammar: {}", e);
585            }
586        }
587
588        // Gitignore grammar
589        match SyntaxDefinition::load_from_str(GITIGNORE_GRAMMAR, true, Some("Gitignore")) {
590            Ok(syntax) => {
591                builder.add(syntax);
592                tracing::debug!("Loaded embedded Gitignore grammar");
593            }
594            Err(e) => {
595                tracing::warn!("Failed to load embedded Gitignore grammar: {}", e);
596            }
597        }
598
599        // Git Config grammar
600        match SyntaxDefinition::load_from_str(GITCONFIG_GRAMMAR, true, Some("Git Config")) {
601            Ok(syntax) => {
602                builder.add(syntax);
603                tracing::debug!("Loaded embedded Git Config grammar");
604            }
605            Err(e) => {
606                tracing::warn!("Failed to load embedded Git Config grammar: {}", e);
607            }
608        }
609
610        // Git Attributes grammar
611        match SyntaxDefinition::load_from_str(GITATTRIBUTES_GRAMMAR, true, Some("Git Attributes")) {
612            Ok(syntax) => {
613                builder.add(syntax);
614                tracing::debug!("Loaded embedded Git Attributes grammar");
615            }
616            Err(e) => {
617                tracing::warn!("Failed to load embedded Git Attributes grammar: {}", e);
618            }
619        }
620
621        // Typst grammar
622        match SyntaxDefinition::load_from_str(TYPST_GRAMMAR, true, Some("Typst")) {
623            Ok(syntax) => {
624                builder.add(syntax);
625                tracing::debug!("Loaded embedded Typst grammar");
626            }
627            Err(e) => {
628                tracing::warn!("Failed to load embedded Typst grammar: {}", e);
629            }
630        }
631
632        // Additional embedded grammars for languages not in syntect defaults
633        let additional_grammars: &[(&str, &str)] = &[
634            (DOCKERFILE_GRAMMAR, "Dockerfile"),
635            (INI_GRAMMAR, "INI"),
636            (CMAKE_GRAMMAR, "CMake"),
637            (SCSS_GRAMMAR, "SCSS"),
638            (LESS_GRAMMAR, "LESS"),
639            (POWERSHELL_GRAMMAR, "PowerShell"),
640            (KOTLIN_GRAMMAR, "Kotlin"),
641            (SWIFT_GRAMMAR, "Swift"),
642            (DART_GRAMMAR, "Dart"),
643            (ELIXIR_GRAMMAR, "Elixir"),
644            (FSHARP_GRAMMAR, "FSharp"),
645            (NIX_GRAMMAR, "Nix"),
646            (HCL_GRAMMAR, "HCL"),
647            (PROTOBUF_GRAMMAR, "Protocol Buffers"),
648            (GRAPHQL_GRAMMAR, "GraphQL"),
649            (JULIA_GRAMMAR, "Julia"),
650            (NIM_GRAMMAR, "Nim"),
651            (GLEAM_GRAMMAR, "Gleam"),
652            (VLANG_GRAMMAR, "V"),
653            (SOLIDITY_GRAMMAR, "Solidity"),
654            (KDL_GRAMMAR, "KDL"),
655            (NUSHELL_GRAMMAR, "Nushell"),
656            (STARLARK_GRAMMAR, "Starlark"),
657            (JUSTFILE_GRAMMAR, "Justfile"),
658            (EARTHFILE_GRAMMAR, "Earthfile"),
659            (GOMOD_GRAMMAR, "Go Module"),
660            (VUE_GRAMMAR, "Vue"),
661            (SVELTE_GRAMMAR, "Svelte"),
662            (ASTRO_GRAMMAR, "Astro"),
663            (HYPRLANG_GRAMMAR, "Hyprlang"),
664            (AUTOHOTKEY_GRAMMAR, "AutoHotkey"),
665            (RACKET_GRAMMAR, "Racket"),
666            (VERILOG_GRAMMAR, "Verilog"),
667            (SYSTEMVERILOG_GRAMMAR, "SystemVerilog"),
668            (VHDL_GRAMMAR, "VHDL"),
669            (C3_GRAMMAR, "C3"),
670        ];
671
672        for (grammar_str, name) in additional_grammars {
673            match SyntaxDefinition::load_from_str(grammar_str, true, Some(name)) {
674                Ok(syntax) => {
675                    builder.add(syntax);
676                    tracing::debug!("Loaded embedded {} grammar", name);
677                }
678                Err(e) => {
679                    tracing::warn!("Failed to load embedded {} grammar: {}", name, e);
680                }
681            }
682        }
683    }
684
685    /// Find syntax for a file by path/extension/filename.
686    ///
687    /// Purely metadata-based — does not read the file. For first-line
688    /// (shebang) fallback, use [`find_by_path`] with a `first_line` argument
689    /// and resolve the returned entry's syntect index.
690    pub fn find_syntax_for_file(&self, path: &Path) -> Option<&SyntaxReference> {
691        let entry = self.find_by_path(path, None)?;
692        entry
693            .engines
694            .syntect
695            .map(|i| &self.syntax_set.syntaxes()[i])
696    }
697
698    /// Find syntax by name, with alias resolution.
699    ///
700    /// Thin wrapper around `find_by_name` that returns the associated syntect
701    /// `SyntaxReference`. Tree-sitter-only entries return `None`.
702    ///
703    /// Falls back to a direct syntect lookup for "Plain Text", which the
704    /// catalog deliberately omits but syntect still exposes.
705    pub fn find_syntax_by_name(&self, name: &str) -> Option<&SyntaxReference> {
706        if let Some(entry) = self.find_by_name(name) {
707            if let Some(idx) = entry.engines.syntect {
708                return Some(&self.syntax_set.syntaxes()[idx]);
709            }
710        }
711        // Plain Text is excluded from the catalog (it's not a "grammar" a user
712        // would ever pick), but syntect still stores it and a handful of
713        // callers still ask for it by name.
714        self.syntax_set.find_syntax_by_name(name)
715    }
716
717    // === Alias management ===
718
719    /// Hardcoded short-name aliases for built-in and embedded grammars.
720    ///
721    /// Each entry maps a short name (lowercase) to the exact syntect grammar name.
722    /// Only grammars whose full name differs significantly from a natural short
723    /// form need an entry here. Grammars already short (e.g., "Rust", "Go") are
724    /// reachable via case-insensitive matching and don't need aliases.
725    fn built_in_aliases() -> Vec<(&'static str, &'static str)> {
726        vec![
727            // Syntect built-in grammars with verbose names
728            ("bash", "Bourne Again Shell (bash)"),
729            ("shell", "Bourne Again Shell (bash)"),
730            ("sh", "Bourne Again Shell (bash)"),
731            ("c++", "C++"),
732            ("cpp", "C++"),
733            ("csharp", "C#"),
734            ("objc", "Objective-C"),
735            ("objcpp", "Objective-C++"),
736            ("regex", "Regular Expressions (Python)"),
737            ("regexp", "Regular Expressions (Python)"),
738            // Embedded grammars with multi-word or non-obvious names
739            ("proto", "Protocol Buffers"),
740            ("protobuf", "Protocol Buffers"),
741            ("gomod", "Go Module"),
742            ("git-rebase", "Git Rebase Todo"),
743            ("git-commit", "Git Commit Message"),
744            ("git-config", "Git Config"),
745            ("git-attributes", "Git Attributes"),
746            ("gitignore", "Gitignore"),
747            ("fsharp", "FSharp"),
748            ("f#", "FSharp"),
749            ("terraform", "HCL"),
750            ("tf", "HCL"),
751            ("ts", "TypeScript"),
752            ("js", "JavaScript"),
753            ("py", "Python"),
754            ("rb", "Ruby"),
755            ("rs", "Rust"),
756            ("md", "Markdown"),
757            ("yml", "YAML"),
758            ("dockerfile", "Dockerfile"),
759        ]
760    }
761
762    /// Populate aliases from the built-in table.
763    ///
764    /// Validates that:
765    /// - Each alias target (full name) exists in the syntax set
766    /// - No alias collides (case-insensitive) with an existing grammar full name
767    /// - No duplicate aliases exist
768    pub(crate) fn populate_built_in_aliases(&mut self) {
769        for (short, full) in Self::built_in_aliases() {
770            self.register_alias_inner(short, full, true);
771        }
772        self.rebuild_catalog();
773    }
774
775    /// Register a short-name alias for a grammar.
776    ///
777    /// Returns `true` if the alias was registered, `false` if rejected due to
778    /// collision or missing target. For built-in aliases, collisions panic
779    /// (they indicate a bug). For dynamic aliases, collisions log a warning.
780    ///
781    /// Splices the alias directly into the catalog rather than rebuilding, so
782    /// any user config previously merged via `apply_language_config` is
783    /// preserved. A full rebuild would wipe those entries.
784    pub(crate) fn register_alias(&mut self, short_name: &str, full_name: &str) -> bool {
785        if !self.register_alias_inner(short_name, full_name, false) {
786            return false;
787        }
788        let short_lower = short_name.to_lowercase();
789        let full_lower = full_name.to_lowercase();
790        if let Some(&idx) = self.catalog_by_name.get(&full_lower) {
791            self.catalog_by_name
792                .entry(short_lower.clone())
793                .or_insert(idx);
794            let entry = &mut self.catalog[idx];
795            let replace = match &entry.short_name {
796                None => true,
797                Some(existing) => short_name.len() < existing.len(),
798            };
799            if replace {
800                entry.short_name = Some(short_lower);
801            }
802        }
803        true
804    }
805
806    fn register_alias_inner(
807        &mut self,
808        short_name: &str,
809        full_name: &str,
810        is_built_in: bool,
811    ) -> bool {
812        let short_lower = short_name.to_lowercase();
813
814        // Validate: target grammar must exist in the syntax set
815        let target_exists = self
816            .syntax_set
817            .syntaxes()
818            .iter()
819            .any(|s| s.name.eq_ignore_ascii_case(full_name));
820        if !target_exists {
821            // Tree-sitter-only targets (e.g. TypeScript) are expected to be
822            // absent from the syntect set. `rebuild_catalog` attaches their
823            // short names via a separate pass over `built_in_aliases()`.
824            if tree_sitter_for_syntect_name(full_name).is_some() {
825                return false;
826            }
827            if is_built_in {
828                // Built-in alias targets should always exist; warn but don't panic
829                // (grammar might have been removed from syntect upstream)
830                tracing::warn!(
831                    "[grammar-alias] Built-in alias '{}' -> '{}': target grammar not found, skipping",
832                    short_name, full_name
833                );
834            } else {
835                tracing::warn!(
836                    "[grammar-alias] Alias '{}' -> '{}': target grammar not found, skipping",
837                    short_name,
838                    full_name
839                );
840            }
841            return false;
842        }
843
844        // Validate: short name must not collide (case-insensitive) with any grammar full name
845        let collides_with_full_name = self
846            .syntax_set
847            .syntaxes()
848            .iter()
849            .any(|s| s.name.eq_ignore_ascii_case(&short_lower));
850        if collides_with_full_name {
851            // This is actually fine — the short name matches a full name directly,
852            // so find_syntax_by_name's case-insensitive search will find it.
853            // No alias needed.
854            tracing::debug!(
855                "[grammar-alias] Alias '{}' matches an existing grammar name, skipping (not needed)",
856                short_name
857            );
858            return false;
859        }
860
861        // Validate: no duplicate alias (case-insensitive)
862        if let Some(existing_target) = self.aliases.get(&short_lower) {
863            if existing_target.eq_ignore_ascii_case(full_name) {
864                // Same mapping, no-op
865                return true;
866            }
867            let msg = format!(
868                "Alias '{}' already maps to '{}', cannot remap to '{}'",
869                short_name, existing_target, full_name
870            );
871            if is_built_in {
872                panic!("[grammar-alias] Built-in alias collision: {}", msg);
873            } else {
874                tracing::warn!("[grammar-alias] {}", msg);
875                return false;
876            }
877        }
878
879        // Resolve the exact syntect name (preserving original case)
880        let exact_name = self
881            .syntax_set
882            .syntaxes()
883            .iter()
884            .find(|s| s.name.eq_ignore_ascii_case(full_name))
885            .map(|s| s.name.clone())
886            .unwrap();
887
888        self.aliases.insert(short_lower, exact_name);
889        true
890    }
891
892    // === Unified catalog ===
893
894    /// Rebuild the flat catalog of grammar entries.
895    ///
896    /// Called after the syntax set, aliases, or filename scopes change.
897    /// Produces one entry per logical language by merging:
898    /// 1. Every `SyntaxReference` in the syntax set (except "Plain Text")
899    /// 2. Every `fresh_languages::Language` not already covered by a syntect entry
900    /// 3. Alias short-names attached to their target entry
901    /// 4. Filename mappings from `filename_scopes` attached to their scope's entry
902    /// 5. Extra extensions from `user_extensions` attached to their scope's entry
903    ///
904    /// Automatically replays the last `apply_language_config` at the end, so
905    /// user `[languages]` config survives any rebuild.
906    pub(crate) fn rebuild_catalog(&mut self) {
907        // Reverse-map: full_name (lowercase) -> shortest alias.
908        //
909        // Seed from the built-in alias table as well as the live `aliases`
910        // HashMap: the live map only contains aliases whose target exists in
911        // the syntect set, so tree-sitter-only entries (TypeScript) would
912        // otherwise never get their short name ("ts").
913        let mut short_by_full: HashMap<String, String> = HashMap::new();
914        let record = |map: &mut HashMap<String, String>, short: &str, full: &str| {
915            let key = full.to_lowercase();
916            let keep = match map.get(&key) {
917                None => true,
918                Some(existing) => short.len() < existing.len(),
919            };
920            if keep {
921                map.insert(key, short.to_string());
922            }
923        };
924        for (short, full) in Self::built_in_aliases() {
925            record(&mut short_by_full, short, full);
926        }
927        for (short, full) in &self.aliases {
928            record(&mut short_by_full, short, full);
929        }
930
931        let derive_language_id =
932            |display_name: &str| -> (String, Option<fresh_languages::Language>) {
933                let ts = tree_sitter_for_syntect_name(display_name);
934                let id = ts
935                    .map(|l| l.id().to_string())
936                    .unwrap_or_else(|| display_name.to_lowercase());
937                (id, ts)
938            };
939
940        let mut catalog: Vec<GrammarEntry> = Vec::new();
941        let mut scope_to_index: HashMap<String, usize> = HashMap::new();
942
943        // Syntect-backed entries (skip Plain Text and JavaScript).
944        //
945        // Syntect's `file_extensions` is a hybrid list: real extensions like
946        // "rb" sit alongside bare filenames like "Gemfile", "Rakefile",
947        // "Makefile". Syntect's own `find_syntax_for_file` tries each entry
948        // against the whole filename AND against the path's extension, and
949        // the catalog has to preserve that semantics. We keep everything in
950        // `extensions` here and index each entry as *both* an extension and
951        // a filename at the bottom of this method.
952        //
953        // JavaScript is skipped here so the catalog falls through to the
954        // tree-sitter-only fallback below — the bundled syntect JS grammar
955        // mishandles class fields whose initialiser is an arrow function
956        // returning a template literal (issue #899: state leaks past the
957        // closing backtick and paints the rest of the file as a string).
958        // tree-sitter-javascript parses template literals from the AST and
959        // does not have this failure mode. `find_syntax_by_name("JavaScript")`
960        // still returns syntect's grammar via the catalog's fallback path,
961        // so markdown popup rendering and other code-string highlighters
962        // are unaffected.
963        for (idx, syntax) in self.syntax_set.syntaxes().iter().enumerate() {
964            if syntax.name == "Plain Text" || syntax.name == "JavaScript" {
965                continue;
966            }
967            let (language_id, tree_sitter) = derive_language_id(&syntax.name);
968            let short_name = short_by_full.get(&syntax.name.to_lowercase()).cloned();
969            let source = self
970                .grammar_sources
971                .get(&syntax.name)
972                .map(|info| info.source.clone())
973                .unwrap_or(GrammarSource::BuiltIn);
974            let entry_index = catalog.len();
975            scope_to_index.insert(syntax.scope.to_string(), entry_index);
976
977            // Union syntect's file_extensions with tree-sitter's own
978            // extension list when the entry carries both engines.
979            // tree-sitter-javascript handles `.jsx`/`.mjs`/`.cjs` that
980            // syntect's JS grammar doesn't list, and the old code used to
981            // route those paths to tree-sitter via a separate lookup.
982            let mut extensions = syntax.file_extensions.clone();
983            if let Some(lang) = tree_sitter {
984                for ext in lang.extensions() {
985                    let ext = ext.to_string();
986                    if !extensions.iter().any(|e| e == &ext) {
987                        extensions.push(ext);
988                    }
989                }
990            }
991
992            catalog.push(GrammarEntry {
993                display_name: syntax.name.clone(),
994                language_id,
995                short_name,
996                extensions,
997                filenames: Vec::new(),
998                filename_globs: Vec::new(),
999                source,
1000                engines: GrammarEngines {
1001                    syntect: Some(idx),
1002                    tree_sitter,
1003                },
1004            });
1005        }
1006
1007        // Attach filename_scopes to their entries.
1008        for (filename, scope) in &self.filename_scopes {
1009            if let Some(&idx) = scope_to_index.get(scope) {
1010                if !catalog[idx].filenames.iter().any(|f| f == filename) {
1011                    catalog[idx].filenames.push(filename.clone());
1012                }
1013            }
1014        }
1015
1016        // Attach user_extensions (extra → scope) to their entries.
1017        for (ext, scope) in &self.user_extensions {
1018            if let Some(&idx) = scope_to_index.get(scope) {
1019                if !catalog[idx].extensions.iter().any(|e| e == ext) {
1020                    catalog[idx].extensions.push(ext.clone());
1021                }
1022            }
1023        }
1024
1025        // Ensure every tree-sitter language has an entry. If a syntect entry
1026        // already maps to the same tree-sitter language, skip it; otherwise
1027        // add a tree-sitter-only entry so the catalog is complete (TypeScript
1028        // being the motivating example — syntect ships no grammar for it).
1029        let mut ts_covered: std::collections::HashSet<fresh_languages::Language> =
1030            std::collections::HashSet::new();
1031        for entry in &catalog {
1032            if let Some(lang) = entry.engines.tree_sitter {
1033                ts_covered.insert(lang);
1034            }
1035        }
1036        for lang in fresh_languages::Language::all() {
1037            if ts_covered.contains(lang) {
1038                continue;
1039            }
1040            let display_name = lang.display_name().to_string();
1041            let language_id = lang.id().to_string();
1042            let short_name = short_by_full.get(&display_name.to_lowercase()).cloned();
1043            let extensions: Vec<String> = lang.extensions().iter().map(|s| s.to_string()).collect();
1044            catalog.push(GrammarEntry {
1045                display_name,
1046                language_id,
1047                short_name,
1048                extensions,
1049                filenames: Vec::new(),
1050                filename_globs: Vec::new(),
1051                source: GrammarSource::BuiltIn,
1052                engines: GrammarEngines {
1053                    syntect: None,
1054                    tree_sitter: Some(*lang),
1055                },
1056            });
1057        }
1058
1059        // Build name / extension / filename indices.
1060        //
1061        // Every entry in `extensions` gets indexed in BOTH `by_extension`
1062        // (lowercased) AND `by_filename` (exact case) — syntect's
1063        // `file_extensions` list holds both real extensions ("rb") and bare
1064        // filenames ("Gemfile", "Rakefile", "Makefile"). Indexing both ways
1065        // matches syntect's own `find_syntax_for_file` semantics.
1066        let mut by_name: HashMap<String, usize> = HashMap::new();
1067        let mut by_extension: HashMap<String, usize> = HashMap::new();
1068        let mut by_filename: HashMap<String, usize> = HashMap::new();
1069        for (idx, entry) in catalog.iter().enumerate() {
1070            by_name.insert(entry.display_name.to_lowercase(), idx);
1071            by_name.insert(entry.language_id.to_lowercase(), idx);
1072            if let Some(short) = &entry.short_name {
1073                by_name.insert(short.to_lowercase(), idx);
1074            }
1075            for ext in &entry.extensions {
1076                by_extension.entry(ext.to_lowercase()).or_insert(idx);
1077                by_filename.entry(ext.clone()).or_insert(idx);
1078            }
1079            for filename in &entry.filenames {
1080                by_filename.entry(filename.clone()).or_insert(idx);
1081            }
1082        }
1083
1084        self.catalog = catalog;
1085        self.catalog_by_name = by_name;
1086        self.catalog_by_extension = by_extension;
1087        self.catalog_by_filename = by_filename;
1088
1089        // Replay the most recent user config so a rebuild doesn't silently
1090        // wipe out user `[languages]` rules. `take` + restore avoids both a
1091        // clone and a borrow checker fight with `apply_language_config_inner`.
1092        if !self.applied_language_config.is_empty() {
1093            let cfg = std::mem::take(&mut self.applied_language_config);
1094            self.apply_language_config_inner(&cfg);
1095            self.applied_language_config = cfg;
1096        }
1097        self.catalog_gen = self.catalog_gen.wrapping_add(1);
1098    }
1099
1100    /// Return the full catalog of grammar entries.
1101    pub fn catalog(&self) -> &[GrammarEntry] {
1102        &self.catalog
1103    }
1104
1105    /// Monotonic generation, bumped on every catalog mutation. Compare against
1106    /// a previously-observed value to decide whether to recompute derived
1107    /// state.
1108    pub fn catalog_gen(&self) -> u64 {
1109        self.catalog_gen
1110    }
1111
1112    /// Look up a grammar entry by display name, language ID, or short alias
1113    /// (case-insensitive). All aliases — built-in and user-config-declared —
1114    /// are indexed directly in `catalog_by_name` during `rebuild_catalog` /
1115    /// `register_alias` / `apply_language_config`, so a single lookup covers
1116    /// every case.
1117    pub fn find_by_name(&self, name: &str) -> Option<&GrammarEntry> {
1118        self.catalog_by_name
1119            .get(&name.to_lowercase())
1120            .map(|&idx| &self.catalog[idx])
1121    }
1122
1123    /// Look up a grammar entry by file path, with optional first-line content
1124    /// for shebang / `first_line_match` detection.
1125    ///
1126    /// Resolution order:
1127    /// 1. Exact filename (config-declared filenames and filename_scopes live here)
1128    /// 2. Glob patterns from user config (e.g. "*.conf", "/etc/**/rc.*")
1129    /// 3. File extension
1130    /// 4. Shebang / first-line regex match on `first_line` if supplied
1131    ///
1132    /// Globs take priority over extension so a user rule like `*.conf → bash`
1133    /// wins over any built-in extension match on `.conf`. The first-line
1134    /// fallback (#4) is last so catalog matches stay authoritative — syntect
1135    /// might otherwise misclassify `.fish` as bash via its first-line
1136    /// regexes.
1137    ///
1138    /// The first-line fallback is pure: it runs syntect's
1139    /// `find_syntax_by_first_line` regex cache against the caller-supplied
1140    /// string. The registry never touches the filesystem — the caller (who
1141    /// already loaded the buffer via the `FileSystem` trait) must extract
1142    /// the first line and pass it in.
1143    pub fn find_by_path(&self, path: &Path, first_line: Option<&str>) -> Option<&GrammarEntry> {
1144        let filename = path.file_name().and_then(|n| n.to_str());
1145        let path_str = path.to_str().unwrap_or("");
1146
1147        if let Some(name) = filename {
1148            if let Some(&idx) = self.catalog_by_filename.get(name) {
1149                return Some(&self.catalog[idx]);
1150            }
1151        }
1152
1153        // Glob walk — filenames with globs are rare so linear scan is fine.
1154        if let Some(name) = filename {
1155            for entry in &self.catalog {
1156                for pattern in &entry.filename_globs {
1157                    let matched = if is_path_pattern(pattern) {
1158                        path_glob_matches(pattern, path_str)
1159                    } else {
1160                        filename_glob_matches(pattern, name)
1161                    };
1162                    if matched {
1163                        return Some(entry);
1164                    }
1165                }
1166            }
1167        }
1168
1169        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1170            if let Some(entry) = self.find_by_extension(ext) {
1171                return Some(entry);
1172            }
1173        }
1174
1175        // Last resort: shebang / first-line regex match against the
1176        // caller-supplied content. Map the matched syntect grammar back to a
1177        // catalog entry by name — every syntect syntax has a catalog entry,
1178        // so this round-trip preserves tree-sitter attachment.
1179        let line = first_line?;
1180        let syntax = self.syntax_set.find_syntax_by_first_line(line)?;
1181        self.find_by_name(&syntax.name)
1182    }
1183
1184    /// Look up a grammar entry by file extension (case-insensitive, without dot).
1185    pub fn find_by_extension(&self, ext: &str) -> Option<&GrammarEntry> {
1186        self.catalog_by_extension
1187            .get(&ext.to_lowercase())
1188            .map(|&idx| &self.catalog[idx])
1189    }
1190
1191    /// Merge user `[languages]` config into the catalog.
1192    ///
1193    /// For each config entry, resolves its grammar to an existing catalog entry
1194    /// (by grammar name or by language id). Extensions are added and override
1195    /// the ext→entry index so config wins over built-in mappings. Filenames are
1196    /// split into exact matches (indexed) and globs (walked at lookup time).
1197    ///
1198    /// If no existing entry matches, a new engine-less entry is created so the
1199    /// language still appears in the palette.
1200    ///
1201    /// Idempotent. The config is cached on the registry so `rebuild_catalog`
1202    /// can replay it — callers don't need to re-apply after a rebuild.
1203    pub fn apply_language_config(
1204        &mut self,
1205        languages: &HashMap<String, crate::config::LanguageConfig>,
1206    ) {
1207        self.applied_language_config = languages.clone();
1208        self.apply_language_config_inner(languages);
1209        self.catalog_gen = self.catalog_gen.wrapping_add(1);
1210    }
1211
1212    /// Do the actual catalog splicing without touching
1213    /// `applied_language_config`. Called from `apply_language_config` (which
1214    /// records the input) and from `rebuild_catalog` (which replays the
1215    /// cached input after wiping the catalog).
1216    fn apply_language_config_inner(
1217        &mut self,
1218        languages: &HashMap<String, crate::config::LanguageConfig>,
1219    ) {
1220        for (lang_id, lang_cfg) in languages {
1221            let grammar_name = if lang_cfg.grammar.is_empty() {
1222                lang_id.as_str()
1223            } else {
1224                lang_cfg.grammar.as_str()
1225            };
1226
1227            // Resolve to an existing entry; fall back to creating one.
1228            let idx = self
1229                .catalog_by_name
1230                .get(&grammar_name.to_lowercase())
1231                .copied()
1232                .or_else(|| self.catalog_by_name.get(&lang_id.to_lowercase()).copied())
1233                .unwrap_or_else(|| {
1234                    let idx = self.catalog.len();
1235                    self.catalog.push(GrammarEntry {
1236                        display_name: lang_id.clone(),
1237                        language_id: lang_id.clone(),
1238                        short_name: None,
1239                        extensions: Vec::new(),
1240                        filenames: Vec::new(),
1241                        filename_globs: Vec::new(),
1242                        source: GrammarSource::BuiltIn,
1243                        engines: GrammarEngines::default(),
1244                    });
1245                    idx
1246                });
1247
1248            // Always index the config key so `find_by_name("mylang")` resolves
1249            // even when `mylang` aliases an existing grammar (e.g.
1250            // `[languages.mylang] grammar = "Rust"`). `or_insert` preserves
1251            // any existing mapping — won't clobber the canonical entry.
1252            self.catalog_by_name
1253                .entry(lang_id.to_lowercase())
1254                .or_insert(idx);
1255
1256            for ext in &lang_cfg.extensions {
1257                if !self.catalog[idx].extensions.iter().any(|e| e == ext) {
1258                    self.catalog[idx].extensions.push(ext.clone());
1259                }
1260                // Config-declared extensions override any previous mapping.
1261                self.catalog_by_extension.insert(ext.to_lowercase(), idx);
1262            }
1263            for filename in &lang_cfg.filenames {
1264                if is_glob_pattern(filename) {
1265                    if !self.catalog[idx]
1266                        .filename_globs
1267                        .iter()
1268                        .any(|f| f == filename)
1269                    {
1270                        self.catalog[idx].filename_globs.push(filename.clone());
1271                    }
1272                } else {
1273                    if !self.catalog[idx].filenames.iter().any(|f| f == filename) {
1274                        self.catalog[idx].filenames.push(filename.clone());
1275                    }
1276                    self.catalog_by_filename.insert(filename.clone(), idx);
1277                }
1278            }
1279        }
1280    }
1281
1282    /// Get the underlying syntax set
1283    pub fn syntax_set(&self) -> &Arc<SyntaxSet> {
1284        &self.syntax_set
1285    }
1286
1287    /// Get a clone of the Arc for sharing
1288    pub fn syntax_set_arc(&self) -> Arc<SyntaxSet> {
1289        Arc::clone(&self.syntax_set)
1290    }
1291
1292    /// List all available syntax names
1293    pub fn available_syntaxes(&self) -> Vec<&str> {
1294        self.syntax_set
1295            .syntaxes()
1296            .iter()
1297            .map(|s| s.name.as_str())
1298            .collect()
1299    }
1300
1301    /// List all available grammars with provenance information.
1302    ///
1303    /// Returns a sorted list of `GrammarInfo` entries derived from the unified
1304    /// catalog — this includes both syntect grammars and tree-sitter-only
1305    /// languages (like TypeScript). Each entry is listed exactly once even
1306    /// when both engines can serve it.
1307    pub fn available_grammar_info(&self) -> Vec<GrammarInfo> {
1308        let mut result: Vec<GrammarInfo> = self
1309            .catalog
1310            .iter()
1311            .map(|entry| GrammarInfo {
1312                name: entry.display_name.clone(),
1313                source: entry.source.clone(),
1314                file_extensions: entry.extensions.clone(),
1315                short_name: entry.short_name.clone(),
1316            })
1317            .collect();
1318        result.sort_by(|a, b| a.name.to_lowercase().cmp(&b.name.to_lowercase()));
1319        result
1320    }
1321
1322    /// Get the grammar sources map.
1323    pub(crate) fn grammar_sources(&self) -> &HashMap<String, GrammarInfo> {
1324        &self.grammar_sources
1325    }
1326
1327    /// Build grammar source info from a pre-compiled syntax set.
1328    ///
1329    /// All grammars in the packdump (syntect defaults + embedded) are tagged as built-in.
1330    pub(crate) fn build_grammar_sources_from_syntax_set(
1331        syntax_set: &SyntaxSet,
1332    ) -> HashMap<String, GrammarInfo> {
1333        let mut sources = HashMap::new();
1334        for syntax in syntax_set.syntaxes() {
1335            sources.insert(
1336                syntax.name.clone(),
1337                GrammarInfo {
1338                    name: syntax.name.clone(),
1339                    source: GrammarSource::BuiltIn,
1340                    file_extensions: syntax.file_extensions.clone(),
1341                    short_name: None,
1342                },
1343            );
1344        }
1345        sources
1346    }
1347
1348    /// Get the user extensions mapping (extension -> scope name).
1349    #[cfg(test)]
1350    pub(crate) fn user_extensions(&self) -> &HashMap<String, String> {
1351        &self.user_extensions
1352    }
1353
1354    /// Get the loaded grammar paths (for deduplication in flush_pending_grammars).
1355    #[cfg(test)]
1356    pub(crate) fn loaded_grammar_paths(&self) -> &[GrammarSpec] {
1357        &self.loaded_grammar_paths
1358    }
1359
1360    /// Create a new registry with additional grammar files
1361    ///
1362    /// This builds a new GrammarRegistry that includes all grammars from
1363    /// the base registry plus the additional grammars specified.
1364    /// Uses the base registry's syntax_set as the builder base, preserving
1365    /// all existing grammars (user grammars, language packs, etc.).
1366    ///
1367    /// # Arguments
1368    /// * `base` - The base registry to extend
1369    /// * `additional` - List of (language, path, extensions) tuples for new grammars
1370    ///
1371    /// # Returns
1372    /// A new GrammarRegistry with the additional grammars, or None if rebuilding fails
1373    pub fn with_additional_grammars(
1374        base: &GrammarRegistry,
1375        additional: &[GrammarSpec],
1376    ) -> Option<Self> {
1377        tracing::info!(
1378            "[SYNTAX DEBUG] with_additional_grammars: adding {} grammars to base with {} syntaxes",
1379            additional.len(),
1380            base.syntax_set.syntaxes().len()
1381        );
1382
1383        // Use the base registry's syntax_set as builder base — this preserves
1384        // ALL existing grammars (defaults, embedded, user, language packs)
1385        // without needing to reload them from disk.
1386        let mut builder = (*base.syntax_set).clone().into_builder();
1387
1388        // Preserve existing user extensions and add new ones
1389        let mut user_extensions = base.user_extensions.clone();
1390
1391        // Track loaded grammar paths (existing + new)
1392        let mut loaded_grammar_paths = base.loaded_grammar_paths.clone();
1393
1394        // Preserve existing grammar sources
1395        let mut grammar_sources = base.grammar_sources.clone();
1396
1397        // Add each new grammar
1398        for spec in additional {
1399            tracing::info!(
1400                "[SYNTAX DEBUG] loading new grammar file: lang='{}', path={:?}, extensions={:?}",
1401                spec.language,
1402                spec.path,
1403                spec.extensions
1404            );
1405            match Self::load_grammar_file(&spec.path) {
1406                Ok(syntax) => {
1407                    let scope = syntax.scope.to_string();
1408                    let syntax_name = syntax.name.clone();
1409                    tracing::info!(
1410                        "[SYNTAX DEBUG] grammar loaded successfully: name='{}', scope='{}'",
1411                        syntax_name,
1412                        scope
1413                    );
1414                    builder.add(syntax);
1415                    tracing::info!(
1416                        "Loaded grammar for '{}' from {:?} with extensions {:?}",
1417                        spec.language,
1418                        spec.path,
1419                        spec.extensions
1420                    );
1421                    // Register extensions for this grammar
1422                    for ext in &spec.extensions {
1423                        user_extensions.insert(ext.clone(), scope.clone());
1424                    }
1425                    // Track provenance
1426                    grammar_sources.insert(
1427                        syntax_name.clone(),
1428                        GrammarInfo {
1429                            name: syntax_name,
1430                            source: GrammarSource::Plugin {
1431                                plugin: spec.language.clone(),
1432                                path: spec.path.clone(),
1433                            },
1434                            file_extensions: spec.extensions.clone(),
1435                            short_name: None,
1436                        },
1437                    );
1438                    // Track this grammar path for future reloads
1439                    loaded_grammar_paths.push(spec.clone());
1440                }
1441                Err(e) => {
1442                    tracing::warn!(
1443                        "Failed to load grammar for '{}' from {:?}: {}",
1444                        spec.language,
1445                        spec.path,
1446                        e
1447                    );
1448                }
1449            }
1450        }
1451
1452        let mut reg = Self {
1453            syntax_set: Arc::new(builder.build()),
1454            user_extensions,
1455            filename_scopes: base.filename_scopes.clone(),
1456            loaded_grammar_paths,
1457            grammar_sources,
1458            aliases: base.aliases.clone(),
1459            catalog: Vec::new(),
1460            catalog_by_name: HashMap::new(),
1461            catalog_by_extension: HashMap::new(),
1462            catalog_by_filename: HashMap::new(),
1463            applied_language_config: HashMap::new(),
1464            catalog_gen: 0,
1465        };
1466        reg.rebuild_catalog();
1467        Some(reg)
1468    }
1469
1470    /// Load a grammar file from disk
1471    ///
1472    /// Only Sublime Text (.sublime-syntax) format is supported.
1473    /// TextMate (.tmLanguage) grammars use a completely different format
1474    /// and cannot be loaded by syntect's yaml-load feature.
1475    pub(crate) fn load_grammar_file(path: &Path) -> Result<SyntaxDefinition, String> {
1476        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
1477
1478        match ext {
1479            "sublime-syntax" => {
1480                let content = std::fs::read_to_string(path)
1481                    .map_err(|e| format!("Failed to read file: {}", e))?;
1482                SyntaxDefinition::load_from_str(
1483                    &content,
1484                    true,
1485                    path.file_stem().and_then(|s| s.to_str()),
1486                )
1487                .map_err(|e| format!("Failed to parse sublime-syntax: {}", e))
1488            }
1489            _ => Err(format!(
1490                "Unsupported grammar format: .{}. Only .sublime-syntax is supported.",
1491                ext
1492            )),
1493        }
1494    }
1495}
1496
1497impl Default for GrammarRegistry {
1498    fn default() -> Self {
1499        // Create with defaults and embedded grammars only (no user grammars)
1500        let defaults = SyntaxSet::load_defaults_newlines();
1501        let mut builder = defaults.into_builder();
1502        Self::add_embedded_grammars(&mut builder);
1503        let syntax_set = builder.build();
1504        let filename_scopes = Self::build_filename_scopes();
1505        let extra_extensions = Self::build_extra_extensions();
1506
1507        let mut registry = Self::new(syntax_set, extra_extensions, filename_scopes);
1508        registry.populate_built_in_aliases();
1509        registry.rebuild_catalog();
1510        registry
1511    }
1512}
1513
1514// VSCode package.json structures for parsing grammar manifests
1515
1516#[derive(Debug, Deserialize)]
1517pub struct PackageManifest {
1518    #[serde(default)]
1519    pub contributes: Option<Contributes>,
1520}
1521
1522#[derive(Debug, Deserialize, Default)]
1523pub struct Contributes {
1524    #[serde(default)]
1525    pub languages: Vec<LanguageContribution>,
1526    #[serde(default)]
1527    pub grammars: Vec<GrammarContribution>,
1528}
1529
1530#[derive(Debug, Deserialize)]
1531pub struct LanguageContribution {
1532    pub id: String,
1533    #[serde(default)]
1534    pub extensions: Vec<String>,
1535}
1536
1537#[derive(Debug, Deserialize)]
1538pub struct GrammarContribution {
1539    pub language: String,
1540    #[serde(rename = "scopeName")]
1541    pub scope_name: String,
1542    pub path: String,
1543}
1544
1545#[cfg(test)]
1546mod tests {
1547    use super::*;
1548
1549    #[test]
1550    fn test_empty_registry() {
1551        let registry = GrammarRegistry::empty();
1552        // Should have at least plain text
1553        assert!(!registry.available_syntaxes().is_empty());
1554    }
1555
1556    #[test]
1557    fn test_default_registry() {
1558        let registry = GrammarRegistry::default();
1559        // Should have built-in syntaxes
1560        assert!(!registry.available_syntaxes().is_empty());
1561    }
1562
1563    #[test]
1564    fn test_find_syntax_for_common_extensions() {
1565        let registry = GrammarRegistry::default();
1566
1567        // Test common extensions that resolve to a syntect (TextMate) grammar
1568        // via the catalog. JavaScript is intentionally NOT here — it is routed
1569        // exclusively to tree-sitter (issue #899) and so has no catalog-level
1570        // syntect entry. Code-block highlighting in popups still finds the
1571        // syntect JS grammar through `SyntaxSet::find_syntax_by_token`, which
1572        // bypasses the catalog.
1573        let test_cases = [
1574            ("test.py", true),
1575            ("test.rs", true),
1576            ("test.js", false),
1577            ("test.json", true),
1578            ("test.md", true),
1579            ("test.html", true),
1580            ("test.css", true),
1581            ("test.unknown_extension_xyz", false),
1582        ];
1583
1584        for (filename, should_exist) in test_cases {
1585            let path = Path::new(filename);
1586            let result = registry.find_syntax_for_file(path);
1587            assert_eq!(
1588                result.is_some(),
1589                should_exist,
1590                "Expected {:?} for {}",
1591                should_exist,
1592                filename
1593            );
1594        }
1595    }
1596
1597    #[test]
1598    fn test_racket_grammar_loaded() {
1599        let registry = GrammarRegistry::default();
1600        for filename in ["main.rkt", "data.rktd", "info.rktl", "doc.scrbl"] {
1601            let result = registry.find_syntax_for_file(Path::new(filename));
1602            assert!(
1603                result.is_some(),
1604                "Racket grammar should be available for {}",
1605                filename
1606            );
1607            let entry = registry.find_by_path(Path::new(filename), None).unwrap();
1608            assert_eq!(entry.display_name, "Racket", "for {}", filename);
1609        }
1610    }
1611
1612    #[test]
1613    fn test_syntax_set_arc() {
1614        let registry = GrammarRegistry::default();
1615        let arc1 = registry.syntax_set_arc();
1616        let arc2 = registry.syntax_set_arc();
1617        // Both should point to the same data
1618        assert!(Arc::ptr_eq(&arc1, &arc2));
1619    }
1620
1621    #[test]
1622    fn test_shell_dotfiles_detection() {
1623        let registry = GrammarRegistry::default();
1624
1625        // All these should be detected as shell scripts
1626        let shell_files = [".zshrc", ".zprofile", ".zshenv", ".bash_aliases"];
1627
1628        for filename in shell_files {
1629            let path = Path::new(filename);
1630            let result = registry.find_syntax_for_file(path);
1631            assert!(
1632                result.is_some(),
1633                "{} should be detected as a syntax",
1634                filename
1635            );
1636            let syntax = result.unwrap();
1637            // Should be detected as Bash/Shell
1638            assert!(
1639                syntax.name.to_lowercase().contains("bash")
1640                    || syntax.name.to_lowercase().contains("shell"),
1641                "{} should be detected as shell/bash, got: {}",
1642                filename,
1643                syntax.name
1644            );
1645        }
1646    }
1647
1648    #[test]
1649    fn test_pkgbuild_detection() {
1650        let registry = GrammarRegistry::default();
1651
1652        // PKGBUILD and APKBUILD should be detected as shell scripts
1653        for filename in ["PKGBUILD", "APKBUILD"] {
1654            let path = Path::new(filename);
1655            let result = registry.find_syntax_for_file(path);
1656            assert!(
1657                result.is_some(),
1658                "{} should be detected as a syntax",
1659                filename
1660            );
1661            let syntax = result.unwrap();
1662            // Should be detected as Bash/Shell
1663            assert!(
1664                syntax.name.to_lowercase().contains("bash")
1665                    || syntax.name.to_lowercase().contains("shell"),
1666                "{} should be detected as shell/bash, got: {}",
1667                filename,
1668                syntax.name
1669            );
1670        }
1671    }
1672
1673    #[test]
1674    fn test_find_syntax_with_glob_filenames() {
1675        let mut registry = GrammarRegistry::default();
1676        let mut languages = std::collections::HashMap::new();
1677        languages.insert(
1678            "shell-configs".to_string(),
1679            crate::config::LanguageConfig {
1680                extensions: vec!["sh".to_string()],
1681                filenames: vec!["*.conf".to_string(), "*rc".to_string()],
1682                grammar: "bash".to_string(),
1683                comment_prefix: Some("#".to_string()),
1684                auto_indent: true,
1685                auto_close: None,
1686                auto_surround: None,
1687                textmate_grammar: None,
1688                show_whitespace_tabs: true,
1689                line_wrap: None,
1690                wrap_column: None,
1691                page_view: None,
1692                page_width: None,
1693                use_tabs: None,
1694                tab_size: None,
1695                formatter: None,
1696                format_on_save: false,
1697                on_save: vec![],
1698                word_characters: None,
1699            },
1700        );
1701        registry.apply_language_config(&languages);
1702
1703        assert!(
1704            registry
1705                .find_by_path(Path::new("nftables.conf"), None)
1706                .is_some(),
1707            "*.conf should match nftables.conf"
1708        );
1709        assert!(
1710            registry.find_by_path(Path::new("lfrc"), None).is_some(),
1711            "*rc should match lfrc"
1712        );
1713        // Unrelated file shouldn't panic.
1714        let _ = registry.find_by_path(Path::new("randomfile"), None);
1715    }
1716
1717    #[test]
1718    fn test_find_syntax_with_path_glob_filenames() {
1719        let mut registry = GrammarRegistry::default();
1720        let mut languages = std::collections::HashMap::new();
1721        languages.insert(
1722            "shell-configs".to_string(),
1723            crate::config::LanguageConfig {
1724                extensions: vec!["sh".to_string()],
1725                filenames: vec!["/etc/**/rc.*".to_string()],
1726                grammar: "bash".to_string(),
1727                comment_prefix: Some("#".to_string()),
1728                auto_indent: true,
1729                auto_close: None,
1730                auto_surround: None,
1731                textmate_grammar: None,
1732                show_whitespace_tabs: true,
1733                line_wrap: None,
1734                wrap_column: None,
1735                page_view: None,
1736                page_width: None,
1737                use_tabs: None,
1738                tab_size: None,
1739                formatter: None,
1740                format_on_save: false,
1741                on_save: vec![],
1742                word_characters: None,
1743            },
1744        );
1745        registry.apply_language_config(&languages);
1746
1747        assert!(
1748            registry
1749                .find_by_path(Path::new("/etc/rc.conf"), None)
1750                .is_some(),
1751            "/etc/**/rc.* should match /etc/rc.conf"
1752        );
1753        assert!(
1754            registry
1755                .find_by_path(Path::new("/etc/init/rc.local"), None)
1756                .is_some(),
1757            "/etc/**/rc.* should match /etc/init/rc.local"
1758        );
1759        let _ = registry.find_by_path(Path::new("/var/rc.conf"), None);
1760    }
1761
1762    #[test]
1763    fn test_exact_filename_takes_priority_over_glob() {
1764        let mut registry = GrammarRegistry::default();
1765        let mut languages = std::collections::HashMap::new();
1766
1767        // A language with exact filename "lfrc" -> python grammar
1768        languages.insert(
1769            "custom-lfrc".to_string(),
1770            crate::config::LanguageConfig {
1771                extensions: vec![],
1772                filenames: vec!["lfrc".to_string()],
1773                grammar: "python".to_string(),
1774                comment_prefix: Some("#".to_string()),
1775                auto_indent: true,
1776                auto_close: None,
1777                auto_surround: None,
1778                textmate_grammar: None,
1779                show_whitespace_tabs: true,
1780                line_wrap: None,
1781                wrap_column: None,
1782                page_view: None,
1783                page_width: None,
1784                use_tabs: None,
1785                tab_size: None,
1786                formatter: None,
1787                format_on_save: false,
1788                on_save: vec![],
1789                word_characters: None,
1790            },
1791        );
1792
1793        // A language with glob "*rc" -> bash grammar
1794        languages.insert(
1795            "rc-files".to_string(),
1796            crate::config::LanguageConfig {
1797                extensions: vec![],
1798                filenames: vec!["*rc".to_string()],
1799                grammar: "bash".to_string(),
1800                comment_prefix: Some("#".to_string()),
1801                auto_indent: true,
1802                auto_close: None,
1803                auto_surround: None,
1804                textmate_grammar: None,
1805                show_whitespace_tabs: true,
1806                line_wrap: None,
1807                wrap_column: None,
1808                page_view: None,
1809                page_width: None,
1810                use_tabs: None,
1811                tab_size: None,
1812                formatter: None,
1813                format_on_save: false,
1814                on_save: vec![],
1815                word_characters: None,
1816            },
1817        );
1818
1819        registry.apply_language_config(&languages);
1820
1821        // "lfrc" should match the exact rule (python), not the glob (bash)
1822        let entry = registry.find_by_path(Path::new("lfrc"), None).unwrap();
1823        assert!(
1824            entry.display_name.to_lowercase().contains("python"),
1825            "exact match should win over glob, got: {}",
1826            entry.display_name
1827        );
1828    }
1829
1830    #[test]
1831    fn test_built_in_aliases_resolve() {
1832        let registry = GrammarRegistry::default();
1833
1834        // "bash" should resolve to "Bourne Again Shell (bash)" via alias
1835        let syntax = registry.find_syntax_by_name("bash");
1836        assert!(syntax.is_some(), "alias 'bash' should resolve");
1837        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1838
1839        // "cpp" should resolve to "C++"
1840        let syntax = registry.find_syntax_by_name("cpp");
1841        assert!(syntax.is_some(), "alias 'cpp' should resolve");
1842        assert_eq!(syntax.unwrap().name, "C++");
1843
1844        // "csharp" should resolve to "C#"
1845        let syntax = registry.find_syntax_by_name("csharp");
1846        assert!(syntax.is_some(), "alias 'csharp' should resolve");
1847        assert_eq!(syntax.unwrap().name, "C#");
1848
1849        // "sh" should also resolve to bash
1850        let syntax = registry.find_syntax_by_name("sh");
1851        assert!(syntax.is_some(), "alias 'sh' should resolve");
1852        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1853
1854        // "proto" should resolve to "Protocol Buffers"
1855        let syntax = registry.find_syntax_by_name("proto");
1856        assert!(syntax.is_some(), "alias 'proto' should resolve");
1857        assert_eq!(syntax.unwrap().name, "Protocol Buffers");
1858    }
1859
1860    #[test]
1861    fn test_alias_case_insensitive_input() {
1862        let registry = GrammarRegistry::default();
1863
1864        // Aliases should be case-insensitive on input
1865        let syntax = registry.find_syntax_by_name("BASH");
1866        assert!(
1867            syntax.is_some(),
1868            "alias 'BASH' should resolve case-insensitively"
1869        );
1870        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1871
1872        let syntax = registry.find_syntax_by_name("Cpp");
1873        assert!(
1874            syntax.is_some(),
1875            "alias 'Cpp' should resolve case-insensitively"
1876        );
1877        assert_eq!(syntax.unwrap().name, "C++");
1878    }
1879
1880    #[test]
1881    fn test_full_name_still_works() {
1882        let registry = GrammarRegistry::default();
1883
1884        // Full names should still work (exact match)
1885        let syntax = registry.find_syntax_by_name("Bourne Again Shell (bash)");
1886        assert!(syntax.is_some(), "full name should still resolve");
1887        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1888
1889        // Case-insensitive full name should still work
1890        let syntax = registry.find_syntax_by_name("bourne again shell (bash)");
1891        assert!(
1892            syntax.is_some(),
1893            "case-insensitive full name should resolve"
1894        );
1895        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1896    }
1897
1898    #[test]
1899    fn test_alias_does_not_shadow_full_names() {
1900        let registry = GrammarRegistry::default();
1901
1902        // "Rust" should resolve directly via case-insensitive match, not via alias
1903        let syntax = registry.find_syntax_by_name("rust");
1904        assert!(syntax.is_some());
1905        assert_eq!(syntax.unwrap().name, "Rust");
1906
1907        // "Go" should resolve directly
1908        let syntax = registry.find_syntax_by_name("go");
1909        assert!(syntax.is_some());
1910        assert_eq!(syntax.unwrap().name, "Go");
1911    }
1912
1913    #[test]
1914    fn test_register_alias_rejects_collision() {
1915        let mut registry = GrammarRegistry::default();
1916
1917        // Trying to register an alias that maps to two different targets should fail
1918        assert!(registry.register_alias("myalias", "Rust"));
1919        assert!(!registry.register_alias("myalias", "Go"));
1920
1921        // Same mapping is fine (idempotent)
1922        assert!(registry.register_alias("myalias", "Rust"));
1923    }
1924
1925    #[test]
1926    fn test_register_alias_rejects_nonexistent_target() {
1927        let mut registry = GrammarRegistry::default();
1928        assert!(!registry.register_alias("nope", "Nonexistent Grammar"));
1929    }
1930
1931    #[test]
1932    fn test_register_alias_skips_existing_grammar_name() {
1933        let mut registry = GrammarRegistry::default();
1934
1935        // "rust" case-insensitively matches the grammar "Rust", so no alias needed
1936        assert!(!registry.register_alias("rust", "Rust"));
1937        // Should still be resolvable via case-insensitive match
1938        assert!(registry.find_syntax_by_name("rust").is_some());
1939    }
1940
1941    #[test]
1942    fn test_available_grammar_info_includes_short_names() {
1943        let registry = GrammarRegistry::default();
1944        let infos = registry.available_grammar_info();
1945
1946        let bash_info = infos.iter().find(|g| g.name == "Bourne Again Shell (bash)");
1947        assert!(bash_info.is_some(), "bash grammar should be in the list");
1948        let bash_info = bash_info.unwrap();
1949        assert!(
1950            bash_info.short_name.is_some(),
1951            "bash grammar should have a short_name"
1952        );
1953        // The shortest alias for bash is "sh"
1954        assert_eq!(bash_info.short_name.as_deref(), Some("sh"));
1955    }
1956
1957    #[test]
1958    fn test_catalog_contains_each_language_once() {
1959        let registry = GrammarRegistry::default();
1960        let catalog = registry.catalog();
1961
1962        // Every catalog entry must have a unique (case-insensitive) display name.
1963        let mut seen = std::collections::HashSet::new();
1964        for entry in catalog {
1965            let key = entry.display_name.to_lowercase();
1966            assert!(
1967                seen.insert(key.clone()),
1968                "duplicate catalog entry for display_name={:?}",
1969                entry.display_name
1970            );
1971        }
1972
1973        // TypeScript is tree-sitter-only (syntect ships no grammar for it) yet
1974        // must still appear in the catalog.
1975        let ts = registry
1976            .find_by_name("TypeScript")
1977            .expect("TypeScript must be in the catalog");
1978        assert!(ts.engines.syntect.is_none());
1979        assert_eq!(
1980            ts.engines.tree_sitter,
1981            Some(fresh_languages::Language::TypeScript)
1982        );
1983        assert_eq!(ts.language_id, "typescript");
1984        assert!(ts.extensions.iter().any(|e| e == "ts"));
1985
1986        // Languages that exist in both syntect and tree-sitter (Rust, Python)
1987        // must appear exactly once and prefer the syntect engine.
1988        for name in ["Rust", "Python"] {
1989            let entry = registry
1990                .find_by_name(name)
1991                .unwrap_or_else(|| panic!("{} must be in the catalog", name));
1992            assert!(
1993                entry.engines.syntect.is_some(),
1994                "{} should have a syntect index",
1995                name
1996            );
1997            assert!(
1998                entry.engines.tree_sitter.is_some(),
1999                "{} should also have a tree-sitter language",
2000                name
2001            );
2002            // Only one entry with this display name (already checked above),
2003            // but also verify language_id lookup lands on the same entry.
2004            let by_id = registry
2005                .find_by_name(&entry.language_id)
2006                .expect("language_id should resolve");
2007            assert_eq!(by_id.display_name, entry.display_name);
2008        }
2009
2010        // JavaScript is deliberately routed to tree-sitter only — the
2011        // bundled syntect JavaScript grammar mishandles certain template
2012        // literals and bleeds string state into the rest of the file
2013        // (issue #899). The catalog must therefore expose a tree-sitter-only
2014        // entry, even though syntect ships a JavaScript grammar.
2015        let js = registry
2016            .find_by_name("JavaScript")
2017            .expect("JavaScript must be in the catalog");
2018        assert!(
2019            js.engines.syntect.is_none(),
2020            "JavaScript must not be routed to the syntect engine (issue #899)"
2021        );
2022        assert_eq!(
2023            js.engines.tree_sitter,
2024            Some(fresh_languages::Language::JavaScript),
2025            "JavaScript must carry the tree-sitter language"
2026        );
2027    }
2028
2029    #[test]
2030    fn test_catalog_find_by_path_and_extension() {
2031        let registry = GrammarRegistry::default();
2032        let ts = registry
2033            .find_by_path(Path::new("foo.ts"), None)
2034            .expect("foo.ts should resolve");
2035        assert_eq!(ts.display_name, "TypeScript");
2036        let rs = registry.find_by_extension("rs").expect("rs should resolve");
2037        assert_eq!(rs.display_name, "Rust");
2038    }
2039
2040    /// Build a minimal LanguageConfig for tests.
2041    fn lang_cfg(
2042        grammar: &str,
2043        extensions: &[&str],
2044        filenames: &[&str],
2045    ) -> crate::config::LanguageConfig {
2046        crate::config::LanguageConfig {
2047            extensions: extensions.iter().map(|s| s.to_string()).collect(),
2048            filenames: filenames.iter().map(|s| s.to_string()).collect(),
2049            grammar: grammar.to_string(),
2050            comment_prefix: None,
2051            auto_indent: true,
2052            auto_close: None,
2053            auto_surround: None,
2054            textmate_grammar: None,
2055            show_whitespace_tabs: true,
2056            line_wrap: None,
2057            wrap_column: None,
2058            page_view: None,
2059            page_width: None,
2060            use_tabs: None,
2061            tab_size: None,
2062            formatter: None,
2063            format_on_save: false,
2064            on_save: vec![],
2065            word_characters: None,
2066        }
2067    }
2068
2069    /// Bug #1: a user-declared config key that aliases an existing grammar
2070    /// (e.g. `[languages.mylang] grammar = "Rust"`) must resolve via
2071    /// `find_by_name("mylang")` so the language palette can select it.
2072    #[test]
2073    fn test_user_alias_resolves_via_find_by_name() {
2074        let mut registry = GrammarRegistry::default();
2075        let mut languages = std::collections::HashMap::new();
2076        languages.insert("mylang".to_string(), lang_cfg("Rust", &[], &[]));
2077        registry.apply_language_config(&languages);
2078
2079        let entry = registry
2080            .find_by_name("mylang")
2081            .expect("user-declared alias 'mylang' must resolve");
2082        assert_eq!(entry.display_name, "Rust");
2083    }
2084
2085    /// Bug #2: `register_alias` used to rebuild the catalog from scratch,
2086    /// wiping out everything `apply_language_config` had merged. Registering
2087    /// an alias afterwards must not lose user config.
2088    #[test]
2089    fn test_register_alias_preserves_applied_language_config() {
2090        let mut registry = GrammarRegistry::default();
2091        let mut languages = std::collections::HashMap::new();
2092        languages.insert(
2093            "shell-configs".to_string(),
2094            lang_cfg("bash", &["myconf"], &["*.myconf"]),
2095        );
2096        registry.apply_language_config(&languages);
2097
2098        // Sanity: config applied.
2099        assert!(registry.find_by_extension("myconf").is_some());
2100        assert!(
2101            registry
2102                .find_by_path(Path::new("foo.myconf"), None)
2103                .is_some(),
2104            "glob should match before register_alias"
2105        );
2106
2107        // Registering an alias must not erase the config we just applied.
2108        registry.register_alias("mycustom", "Rust");
2109
2110        assert!(
2111            registry.find_by_extension("myconf").is_some(),
2112            "config extension must survive register_alias"
2113        );
2114        assert!(
2115            registry
2116                .find_by_path(Path::new("foo.myconf"), None)
2117                .is_some(),
2118            "glob must survive register_alias"
2119        );
2120    }
2121
2122    /// Bug #4: `from_syntax_name` used to unconditionally overwrite the
2123    /// catalog's canonical display name with whatever the user typed (e.g.
2124    /// "BASH") — that string ended up in the status bar.
2125    #[test]
2126    fn test_from_syntax_name_preserves_canonical_display_name() {
2127        use crate::primitives::detected_language::DetectedLanguage;
2128        let registry = GrammarRegistry::default();
2129        let languages = std::collections::HashMap::new();
2130
2131        let detected = DetectedLanguage::from_syntax_name("BASH", &registry, &languages)
2132            .expect("BASH should resolve via alias");
2133        assert_eq!(
2134            detected.display_name, "Bourne Again Shell (bash)",
2135            "display_name must be canonical, not user-typed"
2136        );
2137    }
2138
2139    /// A config-only language (no matching syntect grammar) must still appear
2140    /// in the catalog so the language palette can offer it — the old
2141    /// `DetectedLanguage::from_config_language` branch was load-bearing.
2142    #[test]
2143    fn test_config_only_language_appears_in_catalog() {
2144        let mut registry = GrammarRegistry::default();
2145        let mut languages = std::collections::HashMap::new();
2146        // "fish" isn't in syntect; grammar="fish" doesn't resolve either.
2147        languages.insert("fish".to_string(), lang_cfg("fish", &["fish"], &[]));
2148        registry.apply_language_config(&languages);
2149
2150        let entry = registry
2151            .find_by_name("fish")
2152            .expect("fish should be in the catalog after apply_language_config");
2153        assert!(entry.engines.syntect.is_none());
2154        assert!(entry.engines.tree_sitter.is_none());
2155        assert_eq!(entry.language_id, "fish");
2156        assert!(entry.extensions.iter().any(|e| e == "fish"));
2157    }
2158
2159    /// Config-declared extensions must override the built-in mapping. If the
2160    /// user says `[languages.typescript-overlay] extensions = ["js"] grammar
2161    /// = "TypeScript"`, then `foo.js` must resolve to TypeScript, not
2162    /// JavaScript.
2163    #[test]
2164    fn test_config_extension_overrides_builtin() {
2165        let mut registry = GrammarRegistry::default();
2166        // Sanity: default mapping is JavaScript.
2167        assert_eq!(
2168            registry.find_by_extension("js").unwrap().display_name,
2169            "JavaScript"
2170        );
2171
2172        let mut languages = std::collections::HashMap::new();
2173        languages.insert(
2174            "ts-overlay".to_string(),
2175            lang_cfg("TypeScript", &["js"], &[]),
2176        );
2177        registry.apply_language_config(&languages);
2178
2179        assert_eq!(
2180            registry.find_by_extension("js").unwrap().display_name,
2181            "TypeScript",
2182            "user-config extension must win over built-in"
2183        );
2184    }
2185
2186    /// Bare filenames listed by syntect grammars (e.g. "Gemfile", "Makefile",
2187    /// "Rakefile") must resolve through `find_by_path`. Syntect stores these
2188    /// in each grammar's `file_extensions` field alongside real extensions
2189    /// like "rb"; its own `find_syntax_for_file` treats them as either. The
2190    /// catalog has to do the same or `HighlightEngine::for_file` breaks for
2191    /// every extensionless config file.
2192    #[test]
2193    fn test_bare_filename_resolves_via_find_by_path() {
2194        let registry = GrammarRegistry::default();
2195        for (filename, expected_substr) in [
2196            ("Gemfile", "ruby"),
2197            ("Rakefile", "ruby"),
2198            ("Vagrantfile", "ruby"),
2199            ("Makefile", "makefile"),
2200            ("GNUmakefile", "makefile"),
2201        ] {
2202            let entry = registry
2203                .find_by_path(Path::new(filename), None)
2204                .unwrap_or_else(|| panic!("{} must resolve via catalog", filename));
2205            assert!(
2206                entry.display_name.to_lowercase().contains(expected_substr),
2207                "{} should resolve to {} grammar, got {}",
2208                filename,
2209                expected_substr,
2210                entry.display_name
2211            );
2212        }
2213    }
2214
2215    /// Languages that have both syntect and tree-sitter (e.g. JavaScript) must
2216    /// expose the union of both engines' extensions. Tree-sitter-javascript
2217    /// knows `.jsx`; syntect's JavaScript grammar does not. Both should route
2218    /// through the JavaScript catalog entry.
2219    #[test]
2220    fn test_jsx_resolves_to_javascript() {
2221        let registry = GrammarRegistry::default();
2222        let entry = registry
2223            .find_by_path(Path::new("foo.jsx"), None)
2224            .expect("foo.jsx must resolve");
2225        assert_eq!(entry.display_name, "JavaScript");
2226    }
2227
2228    /// `rebuild_catalog` must replay the last-applied language config so it
2229    /// can never silently wipe user `[languages]` rules. This is the invariant
2230    /// that keeps `register_alias`, `populate_built_in_aliases`, and any
2231    /// future rebuild callsite safe-by-construction.
2232    #[test]
2233    fn test_rebuild_catalog_replays_language_config() {
2234        let mut registry = GrammarRegistry::default();
2235        let mut languages = std::collections::HashMap::new();
2236        languages.insert(
2237            "myshell".to_string(),
2238            lang_cfg("bash", &["myext"], &["*.myglob"]),
2239        );
2240        registry.apply_language_config(&languages);
2241        assert!(registry.find_by_extension("myext").is_some());
2242        assert!(registry
2243            .find_by_path(Path::new("foo.myglob"), None)
2244            .is_some());
2245
2246        // Force a rebuild — the catalog gets wiped and re-populated from
2247        // syntect / tree-sitter, but user config must come back on top.
2248        registry.rebuild_catalog();
2249        assert!(
2250            registry.find_by_extension("myext").is_some(),
2251            "rebuild_catalog must replay applied user config"
2252        );
2253        assert!(
2254            registry
2255                .find_by_path(Path::new("foo.myglob"), None)
2256                .is_some(),
2257            "rebuild_catalog must replay user globs"
2258        );
2259    }
2260
2261    /// `apply_language_config` must be idempotent: calling it twice with the
2262    /// same config yields the same catalog state.
2263    #[test]
2264    fn test_apply_language_config_idempotent() {
2265        let mut registry = GrammarRegistry::default();
2266        let mut languages = std::collections::HashMap::new();
2267        languages.insert(
2268            "shell-cfg".to_string(),
2269            lang_cfg("bash", &["myconf"], &["*.myconf"]),
2270        );
2271
2272        registry.apply_language_config(&languages);
2273        let first_extensions = registry
2274            .find_by_name("bash")
2275            .unwrap()
2276            .extensions
2277            .iter()
2278            .filter(|e| e == &"myconf")
2279            .count();
2280        let first_globs = registry
2281            .find_by_name("bash")
2282            .unwrap()
2283            .filename_globs
2284            .iter()
2285            .filter(|g| g == &"*.myconf")
2286            .count();
2287        assert_eq!(first_extensions, 1);
2288        assert_eq!(first_globs, 1);
2289
2290        // Second call must not duplicate anything.
2291        registry.apply_language_config(&languages);
2292        let second_extensions = registry
2293            .find_by_name("bash")
2294            .unwrap()
2295            .extensions
2296            .iter()
2297            .filter(|e| e == &"myconf")
2298            .count();
2299        let second_globs = registry
2300            .find_by_name("bash")
2301            .unwrap()
2302            .filename_globs
2303            .iter()
2304            .filter(|g| g == &"*.myconf")
2305            .count();
2306        assert_eq!(second_extensions, 1, "extensions must not duplicate");
2307        assert_eq!(second_globs, 1, "globs must not duplicate");
2308    }
2309
2310    /// Julia: a single-quote after an identifier is the adjoint
2311    /// (conjugate-transpose) postfix operator, not the start of a string. The
2312    /// old grammar pushed a string context on every `'`, so `A'` swallowed
2313    /// the rest of the file until the next quote — wrecking highlighting for
2314    /// any subsequent keyword. Issue #1852.
2315    #[test]
2316    fn test_julia_adjoint_does_not_start_string() {
2317        use syntect::parsing::{ParseState, ScopeStack};
2318
2319        let registry = GrammarRegistry::default();
2320        let syntax_set = registry.syntax_set();
2321        let syntax = registry
2322            .find_syntax_by_name("Julia")
2323            .expect("Julia grammar must be loaded");
2324        let mut state = ParseState::new(syntax);
2325        let mut scopes = ScopeStack::new();
2326
2327        // Adjoint operator followed by code on later lines.
2328        let lines = ["x = A'\n", "function foo()\n", "end\n"];
2329        let mut keyword_line_in_string = false;
2330        let mut found_function_keyword = false;
2331
2332        for line in &lines {
2333            let ops = state.parse_line(line, syntax_set).unwrap();
2334            // Walk byte-by-byte, applying ops as we pass their offset.
2335            let mut op_iter = ops.iter().peekable();
2336            for (byte_idx, _) in line.char_indices() {
2337                while let Some((offset, op)) = op_iter.peek() {
2338                    if *offset <= byte_idx {
2339                        scopes.apply(op).unwrap();
2340                        op_iter.next();
2341                    } else {
2342                        break;
2343                    }
2344                }
2345                let in_string = scopes
2346                    .as_slice()
2347                    .iter()
2348                    .any(|s| s.build_string().starts_with("string."));
2349                let is_function_kw = line[byte_idx..].starts_with("function");
2350                if is_function_kw && in_string {
2351                    keyword_line_in_string = true;
2352                }
2353                if is_function_kw && !in_string {
2354                    found_function_keyword = true;
2355                }
2356            }
2357            // Drain remaining ops at end of line.
2358            for (_, op) in op_iter {
2359                scopes.apply(op).unwrap();
2360            }
2361        }
2362
2363        assert!(
2364            !keyword_line_in_string,
2365            "the `function` keyword after an adjoint operator must not be inside a string scope"
2366        );
2367        assert!(
2368            found_function_keyword,
2369            "test harness must have reached the `function` keyword"
2370        );
2371    }
2372
2373    /// Julia: `'a'` is a valid character literal. The grammar must still
2374    /// scope it as a constant/character so themes can color it. Issue #1852.
2375    #[test]
2376    fn test_julia_char_literal_is_recognized() {
2377        use syntect::parsing::{ParseState, ScopeStack};
2378
2379        let registry = GrammarRegistry::default();
2380        let syntax_set = registry.syntax_set();
2381        let syntax = registry
2382            .find_syntax_by_name("Julia")
2383            .expect("Julia grammar must be loaded");
2384        let mut state = ParseState::new(syntax);
2385        let mut scopes = ScopeStack::new();
2386
2387        let line = "x = 'a'\n";
2388        let ops = state.parse_line(line, syntax_set).unwrap();
2389        let mut saw_constant_or_string_at_quote = false;
2390        let mut op_iter = ops.iter().peekable();
2391        for (byte_idx, _) in line.char_indices() {
2392            while let Some((offset, op)) = op_iter.peek() {
2393                if *offset <= byte_idx {
2394                    scopes.apply(op).unwrap();
2395                    op_iter.next();
2396                } else {
2397                    break;
2398                }
2399            }
2400            if byte_idx == 5 {
2401                // position of 'a' (the char)
2402                let scoped = scopes.as_slice().iter().any(|s| {
2403                    let str = s.build_string();
2404                    str.starts_with("constant.") || str.starts_with("string.")
2405                });
2406                if scoped {
2407                    saw_constant_or_string_at_quote = true;
2408                }
2409            }
2410        }
2411        assert!(
2412            saw_constant_or_string_at_quote,
2413            "char literal 'a' must receive a constant/string scope"
2414        );
2415    }
2416
2417    /// `tree_sitter_for_syntect_name` handles the alias table + strict
2418    /// display-name match. The alias table catches syntect's verbose names;
2419    /// the strict match handles the common case.
2420    #[test]
2421    fn test_tree_sitter_bridge() {
2422        assert_eq!(
2423            tree_sitter_for_syntect_name("Bourne Again Shell (bash)"),
2424            Some(fresh_languages::Language::Bash)
2425        );
2426        assert_eq!(
2427            tree_sitter_for_syntect_name("Rust"),
2428            Some(fresh_languages::Language::Rust)
2429        );
2430        // Must NOT fuzzy-match Nushell to Bash.
2431        assert_eq!(tree_sitter_for_syntect_name("Nushell"), None);
2432        // Must NOT match arbitrary strings.
2433        assert_eq!(tree_sitter_for_syntect_name("does-not-exist"), None);
2434    }
2435}