Skip to main content

fresh/primitives/grammar/
types.rs

1//! Pure grammar registry types without I/O operations.
2//!
3//! This module contains the `GrammarRegistry` struct and all syntax lookup methods
4//! that don't require filesystem access. This enables WASM compatibility and easier testing.
5
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10use syntect::parsing::{SyntaxDefinition, SyntaxReference, SyntaxSet, SyntaxSetBuilder};
11
12// Re-export glob matching utilities for use by other modules
13pub use crate::primitives::glob_match::{
14    filename_glob_matches, is_glob_pattern, is_path_pattern, path_glob_matches,
15};
16
17/// A grammar specification: language name, path to grammar file, and associated file extensions.
18///
19/// Used to pass grammar information between the plugin layer, loader, and registry
20/// without relying on anonymous tuples.
21#[derive(Clone, Debug)]
22pub struct GrammarSpec {
23    /// Language identifier (e.g., "elixir")
24    pub language: String,
25    /// Path to the grammar file (.sublime-syntax)
26    pub path: PathBuf,
27    /// File extensions to associate with this grammar (e.g., ["ex", "exs"])
28    pub extensions: Vec<String>,
29}
30
31/// Where a grammar was loaded from.
32#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
33#[serde(tag = "type")]
34pub enum GrammarSource {
35    /// Built-in to Fresh (pre-compiled syntect defaults + embedded grammars)
36    #[serde(rename = "built-in")]
37    BuiltIn,
38    /// Installed from a user grammar directory (~/.config/fresh/grammars/)
39    #[serde(rename = "user")]
40    User { path: PathBuf },
41    /// From a language pack (~/.config/fresh/languages/packages/)
42    #[serde(rename = "language-pack")]
43    LanguagePack { name: String, path: PathBuf },
44    /// From a bundle package (~/.config/fresh/bundles/packages/)
45    #[serde(rename = "bundle")]
46    Bundle { name: String, path: PathBuf },
47    /// Registered by a plugin at runtime
48    #[serde(rename = "plugin")]
49    Plugin { plugin: String, path: PathBuf },
50}
51
52impl std::fmt::Display for GrammarSource {
53    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54        match self {
55            GrammarSource::BuiltIn => write!(f, "built-in"),
56            GrammarSource::User { path } => write!(f, "user ({})", path.display()),
57            GrammarSource::LanguagePack { name, .. } => write!(f, "language-pack ({})", name),
58            GrammarSource::Bundle { name, .. } => write!(f, "bundle ({})", name),
59            GrammarSource::Plugin { plugin, .. } => write!(f, "plugin ({})", plugin),
60        }
61    }
62}
63
64/// Information about an available grammar, including its provenance.
65#[derive(Clone, Debug, Serialize, Deserialize)]
66pub struct GrammarInfo {
67    /// The grammar name as used in config files (case-insensitive matching)
68    pub name: String,
69    /// Where this grammar was loaded from
70    pub source: GrammarSource,
71    /// File extensions associated with this grammar
72    pub file_extensions: Vec<String>,
73    /// Optional short name alias (e.g., "bash" for "Bourne Again Shell (bash)")
74    #[serde(default, skip_serializing_if = "Option::is_none")]
75    pub short_name: Option<String>,
76}
77
78/// Bridge between syntect display names and `fresh_languages::Language`.
79///
80/// Most syntect grammars map one-to-one: "Rust" → `Language::Rust`. A few
81/// have verbose display names that don't match the tree-sitter enum's
82/// `display_name()`, and `Language::from_name` has fuzzy "contains shell"
83/// fallbacks that would wrongly tag Nushell as tree-sitter Bash. This is
84/// the one place we spell the exceptions out explicitly.
85const SYNTECT_TO_TREE_SITTER_ALIASES: &[(&str, fresh_languages::Language)] =
86    &[("Bourne Again Shell (bash)", fresh_languages::Language::Bash)];
87
88/// Resolve a syntect syntax display name to a tree-sitter language, using
89/// strict equality against the alias table and `Language::display_name()`.
90fn tree_sitter_for_syntect_name(display_name: &str) -> Option<fresh_languages::Language> {
91    for (syntect_name, lang) in SYNTECT_TO_TREE_SITTER_ALIASES {
92        if *syntect_name == display_name {
93            return Some(*lang);
94        }
95    }
96    fresh_languages::Language::all()
97        .iter()
98        .find(|l| l.display_name() == display_name)
99        .copied()
100}
101
102/// Which highlighters can serve a given `GrammarEntry`.
103///
104/// A catalog entry may come from syntect (a TextMate grammar indexed into
105/// `SyntaxSet`), tree-sitter (a `fresh_languages::Language`), or both.
106#[derive(Clone, Debug, Default)]
107pub struct GrammarEngines {
108    /// Index into `GrammarRegistry::syntax_set().syntaxes()`, if a syntect
109    /// grammar is available.
110    pub syntect: Option<usize>,
111    /// Tree-sitter language, if one is registered for this grammar.
112    pub tree_sitter: Option<fresh_languages::Language>,
113}
114
115/// A single entry in the unified grammar catalog.
116///
117/// Each entry represents one logical language (e.g. "Rust", "TypeScript") and
118/// records which highlighting engines can serve it, plus the names/extensions
119/// used to look it up. The catalog is the single source of truth for grammar
120/// lookups — `find_by_name`, `find_by_path`, `find_by_extension` all return
121/// entries from here, and both `HighlightEngine::from_entry` and
122/// `DetectedLanguage::from_entry` consume them.
123#[derive(Clone, Debug)]
124pub struct GrammarEntry {
125    /// Human-readable display name (e.g. "TypeScript", "Bourne Again Shell (bash)").
126    pub display_name: String,
127    /// Canonical language ID used in config and LSP (e.g. "typescript", "csharp").
128    pub language_id: String,
129    /// Short alias, if one exists (e.g. "ts" for TypeScript).
130    pub short_name: Option<String>,
131    /// File extensions (without leading dot).
132    pub extensions: Vec<String>,
133    /// Exact filenames that map to this grammar (e.g. "Dockerfile").
134    pub filenames: Vec<String>,
135    /// Filename globs from user config (e.g. "*.conf", "/etc/**/rc.*").
136    pub filename_globs: Vec<String>,
137    /// Where this grammar was loaded from.
138    pub source: GrammarSource,
139    /// Highlighters that can serve this entry.
140    pub engines: GrammarEngines,
141}
142
143/// Embedded TOML grammar (syntect doesn't include one)
144pub const TOML_GRAMMAR: &str = include_str!("../../grammars/toml.sublime-syntax");
145
146/// Embedded Odin grammar (syntect doesn't include one)
147/// From: https://github.com/Tetralux/sublime-odin (MIT License)
148pub const ODIN_GRAMMAR: &str = include_str!("../../grammars/odin/Odin.sublime-syntax");
149
150/// Embedded Zig grammar (syntect doesn't include one)
151pub const ZIG_GRAMMAR: &str = include_str!("../../grammars/zig.sublime-syntax");
152
153/// Embedded Git Rebase Todo grammar for interactive rebase
154pub const GIT_REBASE_GRAMMAR: &str = include_str!("../../grammars/git-rebase.sublime-syntax");
155
156/// Embedded Git Commit Message grammar for COMMIT_EDITMSG, MERGE_MSG, etc.
157pub const GIT_COMMIT_GRAMMAR: &str = include_str!("../../grammars/git-commit.sublime-syntax");
158
159/// Embedded Gitignore grammar for .gitignore and similar files
160pub const GITIGNORE_GRAMMAR: &str = include_str!("../../grammars/gitignore.sublime-syntax");
161
162/// Embedded Git Config grammar for .gitconfig, .gitmodules
163pub const GITCONFIG_GRAMMAR: &str = include_str!("../../grammars/gitconfig.sublime-syntax");
164
165/// Embedded Git Attributes grammar for .gitattributes
166pub const GITATTRIBUTES_GRAMMAR: &str = include_str!("../../grammars/gitattributes.sublime-syntax");
167
168/// Embedded Typst grammar (syntect doesn't include one)
169pub const TYPST_GRAMMAR: &str = include_str!("../../grammars/typst.sublime-syntax");
170
171/// Embedded Dockerfile grammar
172pub const DOCKERFILE_GRAMMAR: &str = include_str!("../../grammars/dockerfile.sublime-syntax");
173/// Embedded INI grammar (also handles .env, .cfg, .editorconfig, etc.)
174pub const INI_GRAMMAR: &str = include_str!("../../grammars/ini.sublime-syntax");
175/// Embedded CMake grammar
176pub const CMAKE_GRAMMAR: &str = include_str!("../../grammars/cmake.sublime-syntax");
177/// Embedded SCSS grammar
178pub const SCSS_GRAMMAR: &str = include_str!("../../grammars/scss.sublime-syntax");
179/// Embedded LESS grammar
180pub const LESS_GRAMMAR: &str = include_str!("../../grammars/less.sublime-syntax");
181/// Embedded PowerShell grammar
182pub const POWERSHELL_GRAMMAR: &str = include_str!("../../grammars/powershell.sublime-syntax");
183/// Embedded Kotlin grammar
184pub const KOTLIN_GRAMMAR: &str = include_str!("../../grammars/kotlin.sublime-syntax");
185/// Embedded Swift grammar
186pub const SWIFT_GRAMMAR: &str = include_str!("../../grammars/swift.sublime-syntax");
187/// Embedded Dart grammar
188pub const DART_GRAMMAR: &str = include_str!("../../grammars/dart.sublime-syntax");
189/// Embedded Elixir grammar
190pub const ELIXIR_GRAMMAR: &str = include_str!("../../grammars/elixir.sublime-syntax");
191/// Embedded F# grammar
192pub const FSHARP_GRAMMAR: &str = include_str!("../../grammars/fsharp.sublime-syntax");
193/// Embedded Nix grammar
194pub const NIX_GRAMMAR: &str = include_str!("../../grammars/nix.sublime-syntax");
195/// Embedded HCL/Terraform grammar
196pub const HCL_GRAMMAR: &str = include_str!("../../grammars/hcl.sublime-syntax");
197/// Embedded Protocol Buffers grammar
198pub const PROTOBUF_GRAMMAR: &str = include_str!("../../grammars/protobuf.sublime-syntax");
199/// Embedded GraphQL grammar
200pub const GRAPHQL_GRAMMAR: &str = include_str!("../../grammars/graphql.sublime-syntax");
201/// Embedded Julia grammar
202pub const JULIA_GRAMMAR: &str = include_str!("../../grammars/julia.sublime-syntax");
203/// Embedded Nim grammar
204pub const NIM_GRAMMAR: &str = include_str!("../../grammars/nim.sublime-syntax");
205/// Embedded Gleam grammar
206pub const GLEAM_GRAMMAR: &str = include_str!("../../grammars/gleam.sublime-syntax");
207/// Embedded V language grammar
208pub const VLANG_GRAMMAR: &str = include_str!("../../grammars/vlang.sublime-syntax");
209/// Embedded Solidity grammar
210pub const SOLIDITY_GRAMMAR: &str = include_str!("../../grammars/solidity.sublime-syntax");
211/// Embedded KDL grammar
212pub const KDL_GRAMMAR: &str = include_str!("../../grammars/kdl.sublime-syntax");
213/// Embedded Nushell grammar
214pub const NUSHELL_GRAMMAR: &str = include_str!("../../grammars/nushell.sublime-syntax");
215/// Embedded Starlark/Bazel grammar
216pub const STARLARK_GRAMMAR: &str = include_str!("../../grammars/starlark.sublime-syntax");
217/// Embedded Justfile grammar
218pub const JUSTFILE_GRAMMAR: &str = include_str!("../../grammars/justfile.sublime-syntax");
219/// Embedded Earthfile grammar
220pub const EARTHFILE_GRAMMAR: &str = include_str!("../../grammars/earthfile.sublime-syntax");
221/// Embedded Go Module grammar
222pub const GOMOD_GRAMMAR: &str = include_str!("../../grammars/gomod.sublime-syntax");
223/// Embedded Vue grammar
224pub const VUE_GRAMMAR: &str = include_str!("../../grammars/vue.sublime-syntax");
225/// Embedded Svelte grammar
226pub const SVELTE_GRAMMAR: &str = include_str!("../../grammars/svelte.sublime-syntax");
227/// Embedded Astro grammar
228pub const ASTRO_GRAMMAR: &str = include_str!("../../grammars/astro.sublime-syntax");
229/// Embedded Hyprlang grammar (Hyprland config)
230pub const HYPRLANG_GRAMMAR: &str = include_str!("../../grammars/hyprlang.sublime-syntax");
231/// Embedded AutoHotkey grammar
232/// From: https://github.com/SALZKARTOFFEEEL/ahk-sublime-syntax (MIT License)
233pub const AUTOHOTKEY_GRAMMAR: &str =
234    include_str!("../../grammars/autohotkey/AutoHotkey.sublime-syntax");
235/// Embedded Racket grammar (syntect doesn't include one)
236pub const RACKET_GRAMMAR: &str = include_str!("../../grammars/racket.sublime-syntax");
237/// Embedded Verilog grammar (HDL)
238pub const VERILOG_GRAMMAR: &str = include_str!("../../grammars/verilog.sublime-syntax");
239/// Embedded SystemVerilog grammar (HDL)
240pub const SYSTEMVERILOG_GRAMMAR: &str = include_str!("../../grammars/systemverilog.sublime-syntax");
241/// Embedded VHDL grammar (HDL)
242pub const VHDL_GRAMMAR: &str = include_str!("../../grammars/vhdl.sublime-syntax");
243
244/// Registry of all available TextMate grammars.
245///
246/// This struct holds the compiled syntax set and provides lookup methods.
247/// It does not perform I/O directly - use `GrammarLoader` for loading grammars.
248impl std::fmt::Debug for GrammarRegistry {
249    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
250        f.debug_struct("GrammarRegistry")
251            .field("syntax_count", &self.syntax_set.syntaxes().len())
252            .finish()
253    }
254}
255
256pub struct GrammarRegistry {
257    /// Combined syntax set (built-in + embedded + user grammars)
258    syntax_set: Arc<SyntaxSet>,
259    /// Extension -> scope name mapping for user grammars (takes priority)
260    user_extensions: HashMap<String, String>,
261    /// Filename -> scope name mapping for dotfiles and special files
262    filename_scopes: HashMap<String, String>,
263    /// Paths to dynamically loaded grammar files (for reloading when adding more)
264    loaded_grammar_paths: Vec<GrammarSpec>,
265    /// Provenance info for each grammar (keyed by grammar name)
266    grammar_sources: HashMap<String, GrammarInfo>,
267    /// Short name aliases: lowercase short_name -> full syntect grammar name.
268    /// Provides a deterministic, one-to-one mapping so users can write
269    /// `grammar = "bash"` instead of `grammar = "Bourne Again Shell (bash)"`.
270    aliases: HashMap<String, String>,
271    /// Unified catalog of every known grammar. Rebuilt whenever the syntax set
272    /// or alias table changes. Lookups (`find_by_name`, `find_by_path`, ...)
273    /// all resolve against this.
274    catalog: Vec<GrammarEntry>,
275    /// Index from lowercased lookup keys (display name, language_id, short_name)
276    /// to catalog index.
277    catalog_by_name: HashMap<String, usize>,
278    /// Index from file extension (without dot) to catalog index.
279    catalog_by_extension: HashMap<String, usize>,
280    /// Index from filename to catalog index.
281    catalog_by_filename: HashMap<String, usize>,
282    /// The most recent language config handed to `apply_language_config`.
283    /// Retained so `rebuild_catalog` can replay it — otherwise a rebuild
284    /// (triggered by e.g. `populate_built_in_aliases`) silently wipes user
285    /// `[languages]` config that was merged on top.
286    applied_language_config: HashMap<String, crate::config::LanguageConfig>,
287    /// Monotonic generation, bumped on every catalog mutation. Lets
288    /// observers (plugin state snapshot) detect changes with one integer
289    /// compare instead of recounting entries.
290    catalog_gen: u64,
291}
292
293impl GrammarRegistry {
294    /// Create a new GrammarRegistry from pre-built components.
295    ///
296    /// This is typically called by `GrammarLoader` implementations after
297    /// loading grammars from various sources.
298    pub(crate) fn new(
299        syntax_set: SyntaxSet,
300        user_extensions: HashMap<String, String>,
301        filename_scopes: HashMap<String, String>,
302    ) -> Self {
303        Self::new_with_loaded_paths(
304            syntax_set,
305            user_extensions,
306            filename_scopes,
307            Vec::new(),
308            HashMap::new(),
309        )
310    }
311
312    /// Create a GrammarRegistry with pre-loaded grammar path tracking.
313    ///
314    /// Used by the loader when plugin grammars were included in the initial build,
315    /// so that `loaded_grammar_paths()` reflects what was actually loaded.
316    pub(crate) fn new_with_loaded_paths(
317        syntax_set: SyntaxSet,
318        user_extensions: HashMap<String, String>,
319        filename_scopes: HashMap<String, String>,
320        loaded_grammar_paths: Vec<GrammarSpec>,
321        grammar_sources: HashMap<String, GrammarInfo>,
322    ) -> Self {
323        let mut reg = Self {
324            syntax_set: Arc::new(syntax_set),
325            user_extensions,
326            filename_scopes,
327            loaded_grammar_paths,
328            grammar_sources,
329            aliases: HashMap::new(),
330            catalog: Vec::new(),
331            catalog_by_name: HashMap::new(),
332            catalog_by_extension: HashMap::new(),
333            catalog_by_filename: HashMap::new(),
334            applied_language_config: HashMap::new(),
335            catalog_gen: 0,
336        };
337        reg.rebuild_catalog();
338        reg
339    }
340
341    /// Create an empty grammar registry (fast, for tests that don't need syntax highlighting)
342    pub fn empty() -> Arc<Self> {
343        let mut builder = SyntaxSetBuilder::new();
344        builder.add_plain_text_syntax();
345        let mut reg = Self {
346            syntax_set: Arc::new(builder.build()),
347            user_extensions: HashMap::new(),
348            filename_scopes: HashMap::new(),
349            loaded_grammar_paths: Vec::new(),
350            grammar_sources: HashMap::new(),
351            aliases: HashMap::new(),
352            catalog: Vec::new(),
353            catalog_by_name: HashMap::new(),
354            catalog_by_extension: HashMap::new(),
355            catalog_by_filename: HashMap::new(),
356            applied_language_config: HashMap::new(),
357            catalog_gen: 0,
358        };
359        reg.rebuild_catalog();
360        Arc::new(reg)
361    }
362
363    /// Create a registry with only syntect's pre-compiled defaults (~0ms).
364    ///
365    /// This provides instant syntax highlighting for ~50 common languages
366    /// (Rust, Python, JS/TS, C/C++, Go, Java, HTML, CSS, Markdown, etc.)
367    /// without any `SyntaxSetBuilder::build()` call. Use this at startup,
368    /// then swap in a full registry built on a background thread.
369    pub fn defaults_only() -> Arc<Self> {
370        // Load pre-compiled syntax set (defaults + embedded grammars) from
371        // build-time packdump. This avoids the expensive into_builder() + build()
372        // cycle at runtime (~12s → ~300ms).
373        tracing::info!("defaults_only: loading pre-compiled syntax packdump...");
374        let syntax_set: SyntaxSet = syntect::dumps::from_uncompressed_data(include_bytes!(
375            concat!(env!("OUT_DIR"), "/default_syntaxes.packdump")
376        ))
377        .expect("Failed to load pre-compiled syntax packdump");
378        tracing::info!(
379            "defaults_only: loaded ({} syntaxes)",
380            syntax_set.syntaxes().len()
381        );
382        let grammar_sources = Self::build_grammar_sources_from_syntax_set(&syntax_set);
383        let filename_scopes = Self::build_filename_scopes();
384        let extra_extensions = Self::build_extra_extensions();
385        let mut registry = Self {
386            syntax_set: Arc::new(syntax_set),
387            user_extensions: extra_extensions,
388            filename_scopes,
389            loaded_grammar_paths: Vec::new(),
390            grammar_sources,
391            aliases: HashMap::new(),
392            catalog: Vec::new(),
393            catalog_by_name: HashMap::new(),
394            catalog_by_extension: HashMap::new(),
395            catalog_by_filename: HashMap::new(),
396            applied_language_config: HashMap::new(),
397            catalog_gen: 0,
398        };
399        registry.populate_built_in_aliases();
400        registry.rebuild_catalog();
401        Arc::new(registry)
402    }
403
404    /// Build extra extension -> scope mappings for extensions not covered by syntect defaults.
405    ///
406    /// These map common file extensions to existing syntect grammar scopes,
407    /// filling gaps where syntect's built-in extension lists are incomplete.
408    pub(crate) fn build_extra_extensions() -> HashMap<String, String> {
409        let mut map = HashMap::new();
410
411        // JavaScript variants not in syntect defaults (["js", "htc"])
412        let js_scope = "source.js".to_string();
413        map.insert("cjs".to_string(), js_scope.clone());
414        map.insert("mjs".to_string(), js_scope);
415
416        // Dockerfile variants (e.g. Dockerfile.dev -> .dev extension)
417        // These won't match by extension, handled by filename_scopes and first_line_match
418
419        map
420    }
421
422    /// Build the default filename -> scope mappings for dotfiles and special files.
423    pub(crate) fn build_filename_scopes() -> HashMap<String, String> {
424        let mut map = HashMap::new();
425
426        // Shell configuration files -> Bash/Shell script scope
427        let shell_scope = "source.shell.bash".to_string();
428        for filename in [
429            ".zshrc",
430            ".zprofile",
431            ".zshenv",
432            ".zlogin",
433            ".zlogout",
434            ".bash_aliases",
435            // .bashrc and .bash_profile are already recognized by syntect
436            // Common shell script files without extensions
437            "PKGBUILD",
438            "APKBUILD",
439        ] {
440            map.insert(filename.to_string(), shell_scope.clone());
441        }
442
443        // Git rebase todo files
444        let git_rebase_scope = "source.git-rebase-todo".to_string();
445        map.insert("git-rebase-todo".to_string(), git_rebase_scope);
446
447        // Git commit message files
448        let git_commit_scope = "source.git-commit".to_string();
449        for filename in ["COMMIT_EDITMSG", "MERGE_MSG", "SQUASH_MSG", "TAG_EDITMSG"] {
450            map.insert(filename.to_string(), git_commit_scope.clone());
451        }
452
453        // Gitignore and similar files
454        let gitignore_scope = "source.gitignore".to_string();
455        for filename in [".gitignore", ".dockerignore", ".npmignore", ".hgignore"] {
456            map.insert(filename.to_string(), gitignore_scope.clone());
457        }
458
459        // Git config files
460        let gitconfig_scope = "source.gitconfig".to_string();
461        for filename in [".gitconfig", ".gitmodules"] {
462            map.insert(filename.to_string(), gitconfig_scope.clone());
463        }
464
465        // Git attributes files
466        let gitattributes_scope = "source.gitattributes".to_string();
467        map.insert(".gitattributes".to_string(), gitattributes_scope);
468
469        // Jenkinsfile -> Groovy
470        let groovy_scope = "source.groovy".to_string();
471        map.insert("Jenkinsfile".to_string(), groovy_scope);
472
473        // Vagrantfile -> Ruby (syntect already handles this, but be explicit)
474        // Brewfile -> Ruby
475        let ruby_scope = "source.ruby".to_string();
476        map.insert("Brewfile".to_string(), ruby_scope);
477
478        // Dockerfile and variants (exact names; Dockerfile.* handled via prefix check)
479        let dockerfile_scope = "source.dockerfile".to_string();
480        map.insert("Dockerfile".to_string(), dockerfile_scope.clone());
481        map.insert("Containerfile".to_string(), dockerfile_scope.clone());
482        // Common Dockerfile variants
483        map.insert("Dockerfile.dev".to_string(), dockerfile_scope.clone());
484        map.insert("Dockerfile.prod".to_string(), dockerfile_scope.clone());
485        map.insert("Dockerfile.test".to_string(), dockerfile_scope.clone());
486        map.insert("Dockerfile.build".to_string(), dockerfile_scope.clone());
487
488        // CMake
489        let cmake_scope = "source.cmake".to_string();
490        map.insert("CMakeLists.txt".to_string(), cmake_scope);
491
492        // Starlark/Bazel
493        let starlark_scope = "source.starlark".to_string();
494        map.insert("BUILD".to_string(), starlark_scope.clone());
495        map.insert("BUILD.bazel".to_string(), starlark_scope.clone());
496        map.insert("WORKSPACE".to_string(), starlark_scope.clone());
497        map.insert("WORKSPACE.bazel".to_string(), starlark_scope.clone());
498        map.insert("Tiltfile".to_string(), starlark_scope);
499
500        // Justfile (various casings)
501        let justfile_scope = "source.justfile".to_string();
502        map.insert("justfile".to_string(), justfile_scope.clone());
503        map.insert("Justfile".to_string(), justfile_scope.clone());
504        map.insert(".justfile".to_string(), justfile_scope);
505
506        // EditorConfig -> INI
507        let ini_scope = "source.ini".to_string();
508        map.insert(".editorconfig".to_string(), ini_scope);
509
510        // Earthfile
511        let earthfile_scope = "source.earthfile".to_string();
512        map.insert("Earthfile".to_string(), earthfile_scope);
513
514        // Hyprlang (Hyprland config files)
515        let hyprlang_scope = "source.hyprlang".to_string();
516        map.insert("hyprland.conf".to_string(), hyprlang_scope.clone());
517        map.insert("hyprpaper.conf".to_string(), hyprlang_scope.clone());
518        map.insert("hyprlock.conf".to_string(), hyprlang_scope);
519
520        // go.mod / go.sum
521        let gomod_scope = "source.gomod".to_string();
522        map.insert("go.mod".to_string(), gomod_scope.clone());
523        map.insert("go.sum".to_string(), gomod_scope);
524
525        map
526    }
527
528    /// Add embedded grammars (TOML, Odin, etc.) to a syntax set builder.
529    pub(crate) fn add_embedded_grammars(builder: &mut SyntaxSetBuilder) {
530        // TOML grammar
531        match SyntaxDefinition::load_from_str(TOML_GRAMMAR, true, Some("TOML")) {
532            Ok(syntax) => {
533                builder.add(syntax);
534                tracing::debug!("Loaded embedded TOML grammar");
535            }
536            Err(e) => {
537                tracing::warn!("Failed to load embedded TOML grammar: {}", e);
538            }
539        }
540
541        // Odin grammar
542        match SyntaxDefinition::load_from_str(ODIN_GRAMMAR, true, Some("Odin")) {
543            Ok(syntax) => {
544                builder.add(syntax);
545                tracing::debug!("Loaded embedded Odin grammar");
546            }
547            Err(e) => {
548                tracing::warn!("Failed to load embedded Odin grammar: {}", e);
549            }
550        }
551
552        // Zig grammar
553        match SyntaxDefinition::load_from_str(ZIG_GRAMMAR, true, Some("Zig")) {
554            Ok(syntax) => {
555                builder.add(syntax);
556                tracing::debug!("Loaded embedded Zig grammar");
557            }
558            Err(e) => {
559                tracing::warn!("Failed to load embedded Zig grammar: {}", e);
560            }
561        }
562
563        // Git Rebase Todo grammar
564        match SyntaxDefinition::load_from_str(GIT_REBASE_GRAMMAR, true, Some("Git Rebase Todo")) {
565            Ok(syntax) => {
566                builder.add(syntax);
567                tracing::debug!("Loaded embedded Git Rebase Todo grammar");
568            }
569            Err(e) => {
570                tracing::warn!("Failed to load embedded Git Rebase Todo grammar: {}", e);
571            }
572        }
573
574        // Git Commit Message grammar
575        match SyntaxDefinition::load_from_str(GIT_COMMIT_GRAMMAR, true, Some("Git Commit Message"))
576        {
577            Ok(syntax) => {
578                builder.add(syntax);
579                tracing::debug!("Loaded embedded Git Commit Message grammar");
580            }
581            Err(e) => {
582                tracing::warn!("Failed to load embedded Git Commit Message grammar: {}", e);
583            }
584        }
585
586        // Gitignore grammar
587        match SyntaxDefinition::load_from_str(GITIGNORE_GRAMMAR, true, Some("Gitignore")) {
588            Ok(syntax) => {
589                builder.add(syntax);
590                tracing::debug!("Loaded embedded Gitignore grammar");
591            }
592            Err(e) => {
593                tracing::warn!("Failed to load embedded Gitignore grammar: {}", e);
594            }
595        }
596
597        // Git Config grammar
598        match SyntaxDefinition::load_from_str(GITCONFIG_GRAMMAR, true, Some("Git Config")) {
599            Ok(syntax) => {
600                builder.add(syntax);
601                tracing::debug!("Loaded embedded Git Config grammar");
602            }
603            Err(e) => {
604                tracing::warn!("Failed to load embedded Git Config grammar: {}", e);
605            }
606        }
607
608        // Git Attributes grammar
609        match SyntaxDefinition::load_from_str(GITATTRIBUTES_GRAMMAR, true, Some("Git Attributes")) {
610            Ok(syntax) => {
611                builder.add(syntax);
612                tracing::debug!("Loaded embedded Git Attributes grammar");
613            }
614            Err(e) => {
615                tracing::warn!("Failed to load embedded Git Attributes grammar: {}", e);
616            }
617        }
618
619        // Typst grammar
620        match SyntaxDefinition::load_from_str(TYPST_GRAMMAR, true, Some("Typst")) {
621            Ok(syntax) => {
622                builder.add(syntax);
623                tracing::debug!("Loaded embedded Typst grammar");
624            }
625            Err(e) => {
626                tracing::warn!("Failed to load embedded Typst grammar: {}", e);
627            }
628        }
629
630        // Additional embedded grammars for languages not in syntect defaults
631        let additional_grammars: &[(&str, &str)] = &[
632            (DOCKERFILE_GRAMMAR, "Dockerfile"),
633            (INI_GRAMMAR, "INI"),
634            (CMAKE_GRAMMAR, "CMake"),
635            (SCSS_GRAMMAR, "SCSS"),
636            (LESS_GRAMMAR, "LESS"),
637            (POWERSHELL_GRAMMAR, "PowerShell"),
638            (KOTLIN_GRAMMAR, "Kotlin"),
639            (SWIFT_GRAMMAR, "Swift"),
640            (DART_GRAMMAR, "Dart"),
641            (ELIXIR_GRAMMAR, "Elixir"),
642            (FSHARP_GRAMMAR, "FSharp"),
643            (NIX_GRAMMAR, "Nix"),
644            (HCL_GRAMMAR, "HCL"),
645            (PROTOBUF_GRAMMAR, "Protocol Buffers"),
646            (GRAPHQL_GRAMMAR, "GraphQL"),
647            (JULIA_GRAMMAR, "Julia"),
648            (NIM_GRAMMAR, "Nim"),
649            (GLEAM_GRAMMAR, "Gleam"),
650            (VLANG_GRAMMAR, "V"),
651            (SOLIDITY_GRAMMAR, "Solidity"),
652            (KDL_GRAMMAR, "KDL"),
653            (NUSHELL_GRAMMAR, "Nushell"),
654            (STARLARK_GRAMMAR, "Starlark"),
655            (JUSTFILE_GRAMMAR, "Justfile"),
656            (EARTHFILE_GRAMMAR, "Earthfile"),
657            (GOMOD_GRAMMAR, "Go Module"),
658            (VUE_GRAMMAR, "Vue"),
659            (SVELTE_GRAMMAR, "Svelte"),
660            (ASTRO_GRAMMAR, "Astro"),
661            (HYPRLANG_GRAMMAR, "Hyprlang"),
662            (AUTOHOTKEY_GRAMMAR, "AutoHotkey"),
663            (RACKET_GRAMMAR, "Racket"),
664            (VERILOG_GRAMMAR, "Verilog"),
665            (SYSTEMVERILOG_GRAMMAR, "SystemVerilog"),
666            (VHDL_GRAMMAR, "VHDL"),
667        ];
668
669        for (grammar_str, name) in additional_grammars {
670            match SyntaxDefinition::load_from_str(grammar_str, true, Some(name)) {
671                Ok(syntax) => {
672                    builder.add(syntax);
673                    tracing::debug!("Loaded embedded {} grammar", name);
674                }
675                Err(e) => {
676                    tracing::warn!("Failed to load embedded {} grammar: {}", name, e);
677                }
678            }
679        }
680    }
681
682    /// Find syntax for a file by path/extension/filename.
683    ///
684    /// Purely metadata-based — does not read the file. For first-line
685    /// (shebang) fallback, use [`find_by_path`] with a `first_line` argument
686    /// and resolve the returned entry's syntect index.
687    pub fn find_syntax_for_file(&self, path: &Path) -> Option<&SyntaxReference> {
688        let entry = self.find_by_path(path, None)?;
689        entry
690            .engines
691            .syntect
692            .map(|i| &self.syntax_set.syntaxes()[i])
693    }
694
695    /// Find syntax by name, with alias resolution.
696    ///
697    /// Thin wrapper around `find_by_name` that returns the associated syntect
698    /// `SyntaxReference`. Tree-sitter-only entries return `None`.
699    ///
700    /// Falls back to a direct syntect lookup for "Plain Text", which the
701    /// catalog deliberately omits but syntect still exposes.
702    pub fn find_syntax_by_name(&self, name: &str) -> Option<&SyntaxReference> {
703        if let Some(entry) = self.find_by_name(name) {
704            if let Some(idx) = entry.engines.syntect {
705                return Some(&self.syntax_set.syntaxes()[idx]);
706            }
707        }
708        // Plain Text is excluded from the catalog (it's not a "grammar" a user
709        // would ever pick), but syntect still stores it and a handful of
710        // callers still ask for it by name.
711        self.syntax_set.find_syntax_by_name(name)
712    }
713
714    // === Alias management ===
715
716    /// Hardcoded short-name aliases for built-in and embedded grammars.
717    ///
718    /// Each entry maps a short name (lowercase) to the exact syntect grammar name.
719    /// Only grammars whose full name differs significantly from a natural short
720    /// form need an entry here. Grammars already short (e.g., "Rust", "Go") are
721    /// reachable via case-insensitive matching and don't need aliases.
722    fn built_in_aliases() -> Vec<(&'static str, &'static str)> {
723        vec![
724            // Syntect built-in grammars with verbose names
725            ("bash", "Bourne Again Shell (bash)"),
726            ("shell", "Bourne Again Shell (bash)"),
727            ("sh", "Bourne Again Shell (bash)"),
728            ("c++", "C++"),
729            ("cpp", "C++"),
730            ("csharp", "C#"),
731            ("objc", "Objective-C"),
732            ("objcpp", "Objective-C++"),
733            ("regex", "Regular Expressions (Python)"),
734            ("regexp", "Regular Expressions (Python)"),
735            // Embedded grammars with multi-word or non-obvious names
736            ("proto", "Protocol Buffers"),
737            ("protobuf", "Protocol Buffers"),
738            ("gomod", "Go Module"),
739            ("git-rebase", "Git Rebase Todo"),
740            ("git-commit", "Git Commit Message"),
741            ("git-config", "Git Config"),
742            ("git-attributes", "Git Attributes"),
743            ("gitignore", "Gitignore"),
744            ("fsharp", "FSharp"),
745            ("f#", "FSharp"),
746            ("terraform", "HCL"),
747            ("tf", "HCL"),
748            ("ts", "TypeScript"),
749            ("js", "JavaScript"),
750            ("py", "Python"),
751            ("rb", "Ruby"),
752            ("rs", "Rust"),
753            ("md", "Markdown"),
754            ("yml", "YAML"),
755            ("dockerfile", "Dockerfile"),
756        ]
757    }
758
759    /// Populate aliases from the built-in table.
760    ///
761    /// Validates that:
762    /// - Each alias target (full name) exists in the syntax set
763    /// - No alias collides (case-insensitive) with an existing grammar full name
764    /// - No duplicate aliases exist
765    pub(crate) fn populate_built_in_aliases(&mut self) {
766        for (short, full) in Self::built_in_aliases() {
767            self.register_alias_inner(short, full, true);
768        }
769        self.rebuild_catalog();
770    }
771
772    /// Register a short-name alias for a grammar.
773    ///
774    /// Returns `true` if the alias was registered, `false` if rejected due to
775    /// collision or missing target. For built-in aliases, collisions panic
776    /// (they indicate a bug). For dynamic aliases, collisions log a warning.
777    ///
778    /// Splices the alias directly into the catalog rather than rebuilding, so
779    /// any user config previously merged via `apply_language_config` is
780    /// preserved. A full rebuild would wipe those entries.
781    pub(crate) fn register_alias(&mut self, short_name: &str, full_name: &str) -> bool {
782        if !self.register_alias_inner(short_name, full_name, false) {
783            return false;
784        }
785        let short_lower = short_name.to_lowercase();
786        let full_lower = full_name.to_lowercase();
787        if let Some(&idx) = self.catalog_by_name.get(&full_lower) {
788            self.catalog_by_name
789                .entry(short_lower.clone())
790                .or_insert(idx);
791            let entry = &mut self.catalog[idx];
792            let replace = match &entry.short_name {
793                None => true,
794                Some(existing) => short_name.len() < existing.len(),
795            };
796            if replace {
797                entry.short_name = Some(short_lower);
798            }
799        }
800        true
801    }
802
803    fn register_alias_inner(
804        &mut self,
805        short_name: &str,
806        full_name: &str,
807        is_built_in: bool,
808    ) -> bool {
809        let short_lower = short_name.to_lowercase();
810
811        // Validate: target grammar must exist in the syntax set
812        let target_exists = self
813            .syntax_set
814            .syntaxes()
815            .iter()
816            .any(|s| s.name.eq_ignore_ascii_case(full_name));
817        if !target_exists {
818            // Tree-sitter-only targets (e.g. TypeScript) are expected to be
819            // absent from the syntect set. `rebuild_catalog` attaches their
820            // short names via a separate pass over `built_in_aliases()`.
821            if tree_sitter_for_syntect_name(full_name).is_some() {
822                return false;
823            }
824            if is_built_in {
825                // Built-in alias targets should always exist; warn but don't panic
826                // (grammar might have been removed from syntect upstream)
827                tracing::warn!(
828                    "[grammar-alias] Built-in alias '{}' -> '{}': target grammar not found, skipping",
829                    short_name, full_name
830                );
831            } else {
832                tracing::warn!(
833                    "[grammar-alias] Alias '{}' -> '{}': target grammar not found, skipping",
834                    short_name,
835                    full_name
836                );
837            }
838            return false;
839        }
840
841        // Validate: short name must not collide (case-insensitive) with any grammar full name
842        let collides_with_full_name = self
843            .syntax_set
844            .syntaxes()
845            .iter()
846            .any(|s| s.name.eq_ignore_ascii_case(&short_lower));
847        if collides_with_full_name {
848            // This is actually fine — the short name matches a full name directly,
849            // so find_syntax_by_name's case-insensitive search will find it.
850            // No alias needed.
851            tracing::debug!(
852                "[grammar-alias] Alias '{}' matches an existing grammar name, skipping (not needed)",
853                short_name
854            );
855            return false;
856        }
857
858        // Validate: no duplicate alias (case-insensitive)
859        if let Some(existing_target) = self.aliases.get(&short_lower) {
860            if existing_target.eq_ignore_ascii_case(full_name) {
861                // Same mapping, no-op
862                return true;
863            }
864            let msg = format!(
865                "Alias '{}' already maps to '{}', cannot remap to '{}'",
866                short_name, existing_target, full_name
867            );
868            if is_built_in {
869                panic!("[grammar-alias] Built-in alias collision: {}", msg);
870            } else {
871                tracing::warn!("[grammar-alias] {}", msg);
872                return false;
873            }
874        }
875
876        // Resolve the exact syntect name (preserving original case)
877        let exact_name = self
878            .syntax_set
879            .syntaxes()
880            .iter()
881            .find(|s| s.name.eq_ignore_ascii_case(full_name))
882            .map(|s| s.name.clone())
883            .unwrap();
884
885        self.aliases.insert(short_lower, exact_name);
886        true
887    }
888
889    // === Unified catalog ===
890
891    /// Rebuild the flat catalog of grammar entries.
892    ///
893    /// Called after the syntax set, aliases, or filename scopes change.
894    /// Produces one entry per logical language by merging:
895    /// 1. Every `SyntaxReference` in the syntax set (except "Plain Text")
896    /// 2. Every `fresh_languages::Language` not already covered by a syntect entry
897    /// 3. Alias short-names attached to their target entry
898    /// 4. Filename mappings from `filename_scopes` attached to their scope's entry
899    /// 5. Extra extensions from `user_extensions` attached to their scope's entry
900    ///
901    /// Automatically replays the last `apply_language_config` at the end, so
902    /// user `[languages]` config survives any rebuild.
903    pub(crate) fn rebuild_catalog(&mut self) {
904        // Reverse-map: full_name (lowercase) -> shortest alias.
905        //
906        // Seed from the built-in alias table as well as the live `aliases`
907        // HashMap: the live map only contains aliases whose target exists in
908        // the syntect set, so tree-sitter-only entries (TypeScript) would
909        // otherwise never get their short name ("ts").
910        let mut short_by_full: HashMap<String, String> = HashMap::new();
911        let record = |map: &mut HashMap<String, String>, short: &str, full: &str| {
912            let key = full.to_lowercase();
913            let keep = match map.get(&key) {
914                None => true,
915                Some(existing) => short.len() < existing.len(),
916            };
917            if keep {
918                map.insert(key, short.to_string());
919            }
920        };
921        for (short, full) in Self::built_in_aliases() {
922            record(&mut short_by_full, short, full);
923        }
924        for (short, full) in &self.aliases {
925            record(&mut short_by_full, short, full);
926        }
927
928        let derive_language_id =
929            |display_name: &str| -> (String, Option<fresh_languages::Language>) {
930                let ts = tree_sitter_for_syntect_name(display_name);
931                let id = ts
932                    .map(|l| l.id().to_string())
933                    .unwrap_or_else(|| display_name.to_lowercase());
934                (id, ts)
935            };
936
937        let mut catalog: Vec<GrammarEntry> = Vec::new();
938        let mut scope_to_index: HashMap<String, usize> = HashMap::new();
939
940        // Syntect-backed entries (skip Plain Text and JavaScript).
941        //
942        // Syntect's `file_extensions` is a hybrid list: real extensions like
943        // "rb" sit alongside bare filenames like "Gemfile", "Rakefile",
944        // "Makefile". Syntect's own `find_syntax_for_file` tries each entry
945        // against the whole filename AND against the path's extension, and
946        // the catalog has to preserve that semantics. We keep everything in
947        // `extensions` here and index each entry as *both* an extension and
948        // a filename at the bottom of this method.
949        //
950        // JavaScript is skipped here so the catalog falls through to the
951        // tree-sitter-only fallback below — the bundled syntect JS grammar
952        // mishandles class fields whose initialiser is an arrow function
953        // returning a template literal (issue #899: state leaks past the
954        // closing backtick and paints the rest of the file as a string).
955        // tree-sitter-javascript parses template literals from the AST and
956        // does not have this failure mode. `find_syntax_by_name("JavaScript")`
957        // still returns syntect's grammar via the catalog's fallback path,
958        // so markdown popup rendering and other code-string highlighters
959        // are unaffected.
960        for (idx, syntax) in self.syntax_set.syntaxes().iter().enumerate() {
961            if syntax.name == "Plain Text" || syntax.name == "JavaScript" {
962                continue;
963            }
964            let (language_id, tree_sitter) = derive_language_id(&syntax.name);
965            let short_name = short_by_full.get(&syntax.name.to_lowercase()).cloned();
966            let source = self
967                .grammar_sources
968                .get(&syntax.name)
969                .map(|info| info.source.clone())
970                .unwrap_or(GrammarSource::BuiltIn);
971            let entry_index = catalog.len();
972            scope_to_index.insert(syntax.scope.to_string(), entry_index);
973
974            // Union syntect's file_extensions with tree-sitter's own
975            // extension list when the entry carries both engines.
976            // tree-sitter-javascript handles `.jsx`/`.mjs`/`.cjs` that
977            // syntect's JS grammar doesn't list, and the old code used to
978            // route those paths to tree-sitter via a separate lookup.
979            let mut extensions = syntax.file_extensions.clone();
980            if let Some(lang) = tree_sitter {
981                for ext in lang.extensions() {
982                    let ext = ext.to_string();
983                    if !extensions.iter().any(|e| e == &ext) {
984                        extensions.push(ext);
985                    }
986                }
987            }
988
989            catalog.push(GrammarEntry {
990                display_name: syntax.name.clone(),
991                language_id,
992                short_name,
993                extensions,
994                filenames: Vec::new(),
995                filename_globs: Vec::new(),
996                source,
997                engines: GrammarEngines {
998                    syntect: Some(idx),
999                    tree_sitter,
1000                },
1001            });
1002        }
1003
1004        // Attach filename_scopes to their entries.
1005        for (filename, scope) in &self.filename_scopes {
1006            if let Some(&idx) = scope_to_index.get(scope) {
1007                if !catalog[idx].filenames.iter().any(|f| f == filename) {
1008                    catalog[idx].filenames.push(filename.clone());
1009                }
1010            }
1011        }
1012
1013        // Attach user_extensions (extra → scope) to their entries.
1014        for (ext, scope) in &self.user_extensions {
1015            if let Some(&idx) = scope_to_index.get(scope) {
1016                if !catalog[idx].extensions.iter().any(|e| e == ext) {
1017                    catalog[idx].extensions.push(ext.clone());
1018                }
1019            }
1020        }
1021
1022        // Ensure every tree-sitter language has an entry. If a syntect entry
1023        // already maps to the same tree-sitter language, skip it; otherwise
1024        // add a tree-sitter-only entry so the catalog is complete (TypeScript
1025        // being the motivating example — syntect ships no grammar for it).
1026        let mut ts_covered: std::collections::HashSet<fresh_languages::Language> =
1027            std::collections::HashSet::new();
1028        for entry in &catalog {
1029            if let Some(lang) = entry.engines.tree_sitter {
1030                ts_covered.insert(lang);
1031            }
1032        }
1033        for lang in fresh_languages::Language::all() {
1034            if ts_covered.contains(lang) {
1035                continue;
1036            }
1037            let display_name = lang.display_name().to_string();
1038            let language_id = lang.id().to_string();
1039            let short_name = short_by_full.get(&display_name.to_lowercase()).cloned();
1040            let extensions: Vec<String> = lang.extensions().iter().map(|s| s.to_string()).collect();
1041            catalog.push(GrammarEntry {
1042                display_name,
1043                language_id,
1044                short_name,
1045                extensions,
1046                filenames: Vec::new(),
1047                filename_globs: Vec::new(),
1048                source: GrammarSource::BuiltIn,
1049                engines: GrammarEngines {
1050                    syntect: None,
1051                    tree_sitter: Some(*lang),
1052                },
1053            });
1054        }
1055
1056        // Build name / extension / filename indices.
1057        //
1058        // Every entry in `extensions` gets indexed in BOTH `by_extension`
1059        // (lowercased) AND `by_filename` (exact case) — syntect's
1060        // `file_extensions` list holds both real extensions ("rb") and bare
1061        // filenames ("Gemfile", "Rakefile", "Makefile"). Indexing both ways
1062        // matches syntect's own `find_syntax_for_file` semantics.
1063        let mut by_name: HashMap<String, usize> = HashMap::new();
1064        let mut by_extension: HashMap<String, usize> = HashMap::new();
1065        let mut by_filename: HashMap<String, usize> = HashMap::new();
1066        for (idx, entry) in catalog.iter().enumerate() {
1067            by_name.insert(entry.display_name.to_lowercase(), idx);
1068            by_name.insert(entry.language_id.to_lowercase(), idx);
1069            if let Some(short) = &entry.short_name {
1070                by_name.insert(short.to_lowercase(), idx);
1071            }
1072            for ext in &entry.extensions {
1073                by_extension.entry(ext.to_lowercase()).or_insert(idx);
1074                by_filename.entry(ext.clone()).or_insert(idx);
1075            }
1076            for filename in &entry.filenames {
1077                by_filename.entry(filename.clone()).or_insert(idx);
1078            }
1079        }
1080
1081        self.catalog = catalog;
1082        self.catalog_by_name = by_name;
1083        self.catalog_by_extension = by_extension;
1084        self.catalog_by_filename = by_filename;
1085
1086        // Replay the most recent user config so a rebuild doesn't silently
1087        // wipe out user `[languages]` rules. `take` + restore avoids both a
1088        // clone and a borrow checker fight with `apply_language_config_inner`.
1089        if !self.applied_language_config.is_empty() {
1090            let cfg = std::mem::take(&mut self.applied_language_config);
1091            self.apply_language_config_inner(&cfg);
1092            self.applied_language_config = cfg;
1093        }
1094        self.catalog_gen = self.catalog_gen.wrapping_add(1);
1095    }
1096
1097    /// Return the full catalog of grammar entries.
1098    pub fn catalog(&self) -> &[GrammarEntry] {
1099        &self.catalog
1100    }
1101
1102    /// Monotonic generation, bumped on every catalog mutation. Compare against
1103    /// a previously-observed value to decide whether to recompute derived
1104    /// state.
1105    pub fn catalog_gen(&self) -> u64 {
1106        self.catalog_gen
1107    }
1108
1109    /// Look up a grammar entry by display name, language ID, or short alias
1110    /// (case-insensitive). All aliases — built-in and user-config-declared —
1111    /// are indexed directly in `catalog_by_name` during `rebuild_catalog` /
1112    /// `register_alias` / `apply_language_config`, so a single lookup covers
1113    /// every case.
1114    pub fn find_by_name(&self, name: &str) -> Option<&GrammarEntry> {
1115        self.catalog_by_name
1116            .get(&name.to_lowercase())
1117            .map(|&idx| &self.catalog[idx])
1118    }
1119
1120    /// Look up a grammar entry by file path, with optional first-line content
1121    /// for shebang / `first_line_match` detection.
1122    ///
1123    /// Resolution order:
1124    /// 1. Exact filename (config-declared filenames and filename_scopes live here)
1125    /// 2. Glob patterns from user config (e.g. "*.conf", "/etc/**/rc.*")
1126    /// 3. File extension
1127    /// 4. Shebang / first-line regex match on `first_line` if supplied
1128    ///
1129    /// Globs take priority over extension so a user rule like `*.conf → bash`
1130    /// wins over any built-in extension match on `.conf`. The first-line
1131    /// fallback (#4) is last so catalog matches stay authoritative — syntect
1132    /// might otherwise misclassify `.fish` as bash via its first-line
1133    /// regexes.
1134    ///
1135    /// The first-line fallback is pure: it runs syntect's
1136    /// `find_syntax_by_first_line` regex cache against the caller-supplied
1137    /// string. The registry never touches the filesystem — the caller (who
1138    /// already loaded the buffer via the `FileSystem` trait) must extract
1139    /// the first line and pass it in.
1140    pub fn find_by_path(&self, path: &Path, first_line: Option<&str>) -> Option<&GrammarEntry> {
1141        let filename = path.file_name().and_then(|n| n.to_str());
1142        let path_str = path.to_str().unwrap_or("");
1143
1144        if let Some(name) = filename {
1145            if let Some(&idx) = self.catalog_by_filename.get(name) {
1146                return Some(&self.catalog[idx]);
1147            }
1148        }
1149
1150        // Glob walk — filenames with globs are rare so linear scan is fine.
1151        if let Some(name) = filename {
1152            for entry in &self.catalog {
1153                for pattern in &entry.filename_globs {
1154                    let matched = if is_path_pattern(pattern) {
1155                        path_glob_matches(pattern, path_str)
1156                    } else {
1157                        filename_glob_matches(pattern, name)
1158                    };
1159                    if matched {
1160                        return Some(entry);
1161                    }
1162                }
1163            }
1164        }
1165
1166        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1167            if let Some(entry) = self.find_by_extension(ext) {
1168                return Some(entry);
1169            }
1170        }
1171
1172        // Last resort: shebang / first-line regex match against the
1173        // caller-supplied content. Map the matched syntect grammar back to a
1174        // catalog entry by name — every syntect syntax has a catalog entry,
1175        // so this round-trip preserves tree-sitter attachment.
1176        let line = first_line?;
1177        let syntax = self.syntax_set.find_syntax_by_first_line(line)?;
1178        self.find_by_name(&syntax.name)
1179    }
1180
1181    /// Look up a grammar entry by file extension (case-insensitive, without dot).
1182    pub fn find_by_extension(&self, ext: &str) -> Option<&GrammarEntry> {
1183        self.catalog_by_extension
1184            .get(&ext.to_lowercase())
1185            .map(|&idx| &self.catalog[idx])
1186    }
1187
1188    /// Merge user `[languages]` config into the catalog.
1189    ///
1190    /// For each config entry, resolves its grammar to an existing catalog entry
1191    /// (by grammar name or by language id). Extensions are added and override
1192    /// the ext→entry index so config wins over built-in mappings. Filenames are
1193    /// split into exact matches (indexed) and globs (walked at lookup time).
1194    ///
1195    /// If no existing entry matches, a new engine-less entry is created so the
1196    /// language still appears in the palette.
1197    ///
1198    /// Idempotent. The config is cached on the registry so `rebuild_catalog`
1199    /// can replay it — callers don't need to re-apply after a rebuild.
1200    pub fn apply_language_config(
1201        &mut self,
1202        languages: &HashMap<String, crate::config::LanguageConfig>,
1203    ) {
1204        self.applied_language_config = languages.clone();
1205        self.apply_language_config_inner(languages);
1206        self.catalog_gen = self.catalog_gen.wrapping_add(1);
1207    }
1208
1209    /// Do the actual catalog splicing without touching
1210    /// `applied_language_config`. Called from `apply_language_config` (which
1211    /// records the input) and from `rebuild_catalog` (which replays the
1212    /// cached input after wiping the catalog).
1213    fn apply_language_config_inner(
1214        &mut self,
1215        languages: &HashMap<String, crate::config::LanguageConfig>,
1216    ) {
1217        for (lang_id, lang_cfg) in languages {
1218            let grammar_name = if lang_cfg.grammar.is_empty() {
1219                lang_id.as_str()
1220            } else {
1221                lang_cfg.grammar.as_str()
1222            };
1223
1224            // Resolve to an existing entry; fall back to creating one.
1225            let idx = self
1226                .catalog_by_name
1227                .get(&grammar_name.to_lowercase())
1228                .copied()
1229                .or_else(|| self.catalog_by_name.get(&lang_id.to_lowercase()).copied())
1230                .unwrap_or_else(|| {
1231                    let idx = self.catalog.len();
1232                    self.catalog.push(GrammarEntry {
1233                        display_name: lang_id.clone(),
1234                        language_id: lang_id.clone(),
1235                        short_name: None,
1236                        extensions: Vec::new(),
1237                        filenames: Vec::new(),
1238                        filename_globs: Vec::new(),
1239                        source: GrammarSource::BuiltIn,
1240                        engines: GrammarEngines::default(),
1241                    });
1242                    idx
1243                });
1244
1245            // Always index the config key so `find_by_name("mylang")` resolves
1246            // even when `mylang` aliases an existing grammar (e.g.
1247            // `[languages.mylang] grammar = "Rust"`). `or_insert` preserves
1248            // any existing mapping — won't clobber the canonical entry.
1249            self.catalog_by_name
1250                .entry(lang_id.to_lowercase())
1251                .or_insert(idx);
1252
1253            for ext in &lang_cfg.extensions {
1254                if !self.catalog[idx].extensions.iter().any(|e| e == ext) {
1255                    self.catalog[idx].extensions.push(ext.clone());
1256                }
1257                // Config-declared extensions override any previous mapping.
1258                self.catalog_by_extension.insert(ext.to_lowercase(), idx);
1259            }
1260            for filename in &lang_cfg.filenames {
1261                if is_glob_pattern(filename) {
1262                    if !self.catalog[idx]
1263                        .filename_globs
1264                        .iter()
1265                        .any(|f| f == filename)
1266                    {
1267                        self.catalog[idx].filename_globs.push(filename.clone());
1268                    }
1269                } else {
1270                    if !self.catalog[idx].filenames.iter().any(|f| f == filename) {
1271                        self.catalog[idx].filenames.push(filename.clone());
1272                    }
1273                    self.catalog_by_filename.insert(filename.clone(), idx);
1274                }
1275            }
1276        }
1277    }
1278
1279    /// Get the underlying syntax set
1280    pub fn syntax_set(&self) -> &Arc<SyntaxSet> {
1281        &self.syntax_set
1282    }
1283
1284    /// Get a clone of the Arc for sharing
1285    pub fn syntax_set_arc(&self) -> Arc<SyntaxSet> {
1286        Arc::clone(&self.syntax_set)
1287    }
1288
1289    /// List all available syntax names
1290    pub fn available_syntaxes(&self) -> Vec<&str> {
1291        self.syntax_set
1292            .syntaxes()
1293            .iter()
1294            .map(|s| s.name.as_str())
1295            .collect()
1296    }
1297
1298    /// List all available grammars with provenance information.
1299    ///
1300    /// Returns a sorted list of `GrammarInfo` entries derived from the unified
1301    /// catalog — this includes both syntect grammars and tree-sitter-only
1302    /// languages (like TypeScript). Each entry is listed exactly once even
1303    /// when both engines can serve it.
1304    pub fn available_grammar_info(&self) -> Vec<GrammarInfo> {
1305        let mut result: Vec<GrammarInfo> = self
1306            .catalog
1307            .iter()
1308            .map(|entry| GrammarInfo {
1309                name: entry.display_name.clone(),
1310                source: entry.source.clone(),
1311                file_extensions: entry.extensions.clone(),
1312                short_name: entry.short_name.clone(),
1313            })
1314            .collect();
1315        result.sort_by(|a, b| a.name.to_lowercase().cmp(&b.name.to_lowercase()));
1316        result
1317    }
1318
1319    /// Get the grammar sources map.
1320    pub(crate) fn grammar_sources(&self) -> &HashMap<String, GrammarInfo> {
1321        &self.grammar_sources
1322    }
1323
1324    /// Build grammar source info from a pre-compiled syntax set.
1325    ///
1326    /// All grammars in the packdump (syntect defaults + embedded) are tagged as built-in.
1327    pub(crate) fn build_grammar_sources_from_syntax_set(
1328        syntax_set: &SyntaxSet,
1329    ) -> HashMap<String, GrammarInfo> {
1330        let mut sources = HashMap::new();
1331        for syntax in syntax_set.syntaxes() {
1332            sources.insert(
1333                syntax.name.clone(),
1334                GrammarInfo {
1335                    name: syntax.name.clone(),
1336                    source: GrammarSource::BuiltIn,
1337                    file_extensions: syntax.file_extensions.clone(),
1338                    short_name: None,
1339                },
1340            );
1341        }
1342        sources
1343    }
1344
1345    /// Get the user extensions mapping (extension -> scope name).
1346    #[cfg(test)]
1347    pub(crate) fn user_extensions(&self) -> &HashMap<String, String> {
1348        &self.user_extensions
1349    }
1350
1351    /// Get the loaded grammar paths (for deduplication in flush_pending_grammars).
1352    #[cfg(test)]
1353    pub(crate) fn loaded_grammar_paths(&self) -> &[GrammarSpec] {
1354        &self.loaded_grammar_paths
1355    }
1356
1357    /// Create a new registry with additional grammar files
1358    ///
1359    /// This builds a new GrammarRegistry that includes all grammars from
1360    /// the base registry plus the additional grammars specified.
1361    /// Uses the base registry's syntax_set as the builder base, preserving
1362    /// all existing grammars (user grammars, language packs, etc.).
1363    ///
1364    /// # Arguments
1365    /// * `base` - The base registry to extend
1366    /// * `additional` - List of (language, path, extensions) tuples for new grammars
1367    ///
1368    /// # Returns
1369    /// A new GrammarRegistry with the additional grammars, or None if rebuilding fails
1370    pub fn with_additional_grammars(
1371        base: &GrammarRegistry,
1372        additional: &[GrammarSpec],
1373    ) -> Option<Self> {
1374        tracing::info!(
1375            "[SYNTAX DEBUG] with_additional_grammars: adding {} grammars to base with {} syntaxes",
1376            additional.len(),
1377            base.syntax_set.syntaxes().len()
1378        );
1379
1380        // Use the base registry's syntax_set as builder base — this preserves
1381        // ALL existing grammars (defaults, embedded, user, language packs)
1382        // without needing to reload them from disk.
1383        let mut builder = (*base.syntax_set).clone().into_builder();
1384
1385        // Preserve existing user extensions and add new ones
1386        let mut user_extensions = base.user_extensions.clone();
1387
1388        // Track loaded grammar paths (existing + new)
1389        let mut loaded_grammar_paths = base.loaded_grammar_paths.clone();
1390
1391        // Preserve existing grammar sources
1392        let mut grammar_sources = base.grammar_sources.clone();
1393
1394        // Add each new grammar
1395        for spec in additional {
1396            tracing::info!(
1397                "[SYNTAX DEBUG] loading new grammar file: lang='{}', path={:?}, extensions={:?}",
1398                spec.language,
1399                spec.path,
1400                spec.extensions
1401            );
1402            match Self::load_grammar_file(&spec.path) {
1403                Ok(syntax) => {
1404                    let scope = syntax.scope.to_string();
1405                    let syntax_name = syntax.name.clone();
1406                    tracing::info!(
1407                        "[SYNTAX DEBUG] grammar loaded successfully: name='{}', scope='{}'",
1408                        syntax_name,
1409                        scope
1410                    );
1411                    builder.add(syntax);
1412                    tracing::info!(
1413                        "Loaded grammar for '{}' from {:?} with extensions {:?}",
1414                        spec.language,
1415                        spec.path,
1416                        spec.extensions
1417                    );
1418                    // Register extensions for this grammar
1419                    for ext in &spec.extensions {
1420                        user_extensions.insert(ext.clone(), scope.clone());
1421                    }
1422                    // Track provenance
1423                    grammar_sources.insert(
1424                        syntax_name.clone(),
1425                        GrammarInfo {
1426                            name: syntax_name,
1427                            source: GrammarSource::Plugin {
1428                                plugin: spec.language.clone(),
1429                                path: spec.path.clone(),
1430                            },
1431                            file_extensions: spec.extensions.clone(),
1432                            short_name: None,
1433                        },
1434                    );
1435                    // Track this grammar path for future reloads
1436                    loaded_grammar_paths.push(spec.clone());
1437                }
1438                Err(e) => {
1439                    tracing::warn!(
1440                        "Failed to load grammar for '{}' from {:?}: {}",
1441                        spec.language,
1442                        spec.path,
1443                        e
1444                    );
1445                }
1446            }
1447        }
1448
1449        let mut reg = Self {
1450            syntax_set: Arc::new(builder.build()),
1451            user_extensions,
1452            filename_scopes: base.filename_scopes.clone(),
1453            loaded_grammar_paths,
1454            grammar_sources,
1455            aliases: base.aliases.clone(),
1456            catalog: Vec::new(),
1457            catalog_by_name: HashMap::new(),
1458            catalog_by_extension: HashMap::new(),
1459            catalog_by_filename: HashMap::new(),
1460            applied_language_config: HashMap::new(),
1461            catalog_gen: 0,
1462        };
1463        reg.rebuild_catalog();
1464        Some(reg)
1465    }
1466
1467    /// Load a grammar file from disk
1468    ///
1469    /// Only Sublime Text (.sublime-syntax) format is supported.
1470    /// TextMate (.tmLanguage) grammars use a completely different format
1471    /// and cannot be loaded by syntect's yaml-load feature.
1472    pub(crate) fn load_grammar_file(path: &Path) -> Result<SyntaxDefinition, String> {
1473        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
1474
1475        match ext {
1476            "sublime-syntax" => {
1477                let content = std::fs::read_to_string(path)
1478                    .map_err(|e| format!("Failed to read file: {}", e))?;
1479                SyntaxDefinition::load_from_str(
1480                    &content,
1481                    true,
1482                    path.file_stem().and_then(|s| s.to_str()),
1483                )
1484                .map_err(|e| format!("Failed to parse sublime-syntax: {}", e))
1485            }
1486            _ => Err(format!(
1487                "Unsupported grammar format: .{}. Only .sublime-syntax is supported.",
1488                ext
1489            )),
1490        }
1491    }
1492}
1493
1494impl Default for GrammarRegistry {
1495    fn default() -> Self {
1496        // Create with defaults and embedded grammars only (no user grammars)
1497        let defaults = SyntaxSet::load_defaults_newlines();
1498        let mut builder = defaults.into_builder();
1499        Self::add_embedded_grammars(&mut builder);
1500        let syntax_set = builder.build();
1501        let filename_scopes = Self::build_filename_scopes();
1502        let extra_extensions = Self::build_extra_extensions();
1503
1504        let mut registry = Self::new(syntax_set, extra_extensions, filename_scopes);
1505        registry.populate_built_in_aliases();
1506        registry.rebuild_catalog();
1507        registry
1508    }
1509}
1510
1511// VSCode package.json structures for parsing grammar manifests
1512
1513#[derive(Debug, Deserialize)]
1514pub struct PackageManifest {
1515    #[serde(default)]
1516    pub contributes: Option<Contributes>,
1517}
1518
1519#[derive(Debug, Deserialize, Default)]
1520pub struct Contributes {
1521    #[serde(default)]
1522    pub languages: Vec<LanguageContribution>,
1523    #[serde(default)]
1524    pub grammars: Vec<GrammarContribution>,
1525}
1526
1527#[derive(Debug, Deserialize)]
1528pub struct LanguageContribution {
1529    pub id: String,
1530    #[serde(default)]
1531    pub extensions: Vec<String>,
1532}
1533
1534#[derive(Debug, Deserialize)]
1535pub struct GrammarContribution {
1536    pub language: String,
1537    #[serde(rename = "scopeName")]
1538    pub scope_name: String,
1539    pub path: String,
1540}
1541
1542#[cfg(test)]
1543mod tests {
1544    use super::*;
1545
1546    #[test]
1547    fn test_empty_registry() {
1548        let registry = GrammarRegistry::empty();
1549        // Should have at least plain text
1550        assert!(!registry.available_syntaxes().is_empty());
1551    }
1552
1553    #[test]
1554    fn test_default_registry() {
1555        let registry = GrammarRegistry::default();
1556        // Should have built-in syntaxes
1557        assert!(!registry.available_syntaxes().is_empty());
1558    }
1559
1560    #[test]
1561    fn test_find_syntax_for_common_extensions() {
1562        let registry = GrammarRegistry::default();
1563
1564        // Test common extensions that resolve to a syntect (TextMate) grammar
1565        // via the catalog. JavaScript is intentionally NOT here — it is routed
1566        // exclusively to tree-sitter (issue #899) and so has no catalog-level
1567        // syntect entry. Code-block highlighting in popups still finds the
1568        // syntect JS grammar through `SyntaxSet::find_syntax_by_token`, which
1569        // bypasses the catalog.
1570        let test_cases = [
1571            ("test.py", true),
1572            ("test.rs", true),
1573            ("test.js", false),
1574            ("test.json", true),
1575            ("test.md", true),
1576            ("test.html", true),
1577            ("test.css", true),
1578            ("test.unknown_extension_xyz", false),
1579        ];
1580
1581        for (filename, should_exist) in test_cases {
1582            let path = Path::new(filename);
1583            let result = registry.find_syntax_for_file(path);
1584            assert_eq!(
1585                result.is_some(),
1586                should_exist,
1587                "Expected {:?} for {}",
1588                should_exist,
1589                filename
1590            );
1591        }
1592    }
1593
1594    #[test]
1595    fn test_racket_grammar_loaded() {
1596        let registry = GrammarRegistry::default();
1597        for filename in ["main.rkt", "data.rktd", "info.rktl", "doc.scrbl"] {
1598            let result = registry.find_syntax_for_file(Path::new(filename));
1599            assert!(
1600                result.is_some(),
1601                "Racket grammar should be available for {}",
1602                filename
1603            );
1604            let entry = registry.find_by_path(Path::new(filename), None).unwrap();
1605            assert_eq!(entry.display_name, "Racket", "for {}", filename);
1606        }
1607    }
1608
1609    #[test]
1610    fn test_syntax_set_arc() {
1611        let registry = GrammarRegistry::default();
1612        let arc1 = registry.syntax_set_arc();
1613        let arc2 = registry.syntax_set_arc();
1614        // Both should point to the same data
1615        assert!(Arc::ptr_eq(&arc1, &arc2));
1616    }
1617
1618    #[test]
1619    fn test_shell_dotfiles_detection() {
1620        let registry = GrammarRegistry::default();
1621
1622        // All these should be detected as shell scripts
1623        let shell_files = [".zshrc", ".zprofile", ".zshenv", ".bash_aliases"];
1624
1625        for filename in shell_files {
1626            let path = Path::new(filename);
1627            let result = registry.find_syntax_for_file(path);
1628            assert!(
1629                result.is_some(),
1630                "{} should be detected as a syntax",
1631                filename
1632            );
1633            let syntax = result.unwrap();
1634            // Should be detected as Bash/Shell
1635            assert!(
1636                syntax.name.to_lowercase().contains("bash")
1637                    || syntax.name.to_lowercase().contains("shell"),
1638                "{} should be detected as shell/bash, got: {}",
1639                filename,
1640                syntax.name
1641            );
1642        }
1643    }
1644
1645    #[test]
1646    fn test_pkgbuild_detection() {
1647        let registry = GrammarRegistry::default();
1648
1649        // PKGBUILD and APKBUILD should be detected as shell scripts
1650        for filename in ["PKGBUILD", "APKBUILD"] {
1651            let path = Path::new(filename);
1652            let result = registry.find_syntax_for_file(path);
1653            assert!(
1654                result.is_some(),
1655                "{} should be detected as a syntax",
1656                filename
1657            );
1658            let syntax = result.unwrap();
1659            // Should be detected as Bash/Shell
1660            assert!(
1661                syntax.name.to_lowercase().contains("bash")
1662                    || syntax.name.to_lowercase().contains("shell"),
1663                "{} should be detected as shell/bash, got: {}",
1664                filename,
1665                syntax.name
1666            );
1667        }
1668    }
1669
1670    #[test]
1671    fn test_find_syntax_with_glob_filenames() {
1672        let mut registry = GrammarRegistry::default();
1673        let mut languages = std::collections::HashMap::new();
1674        languages.insert(
1675            "shell-configs".to_string(),
1676            crate::config::LanguageConfig {
1677                extensions: vec!["sh".to_string()],
1678                filenames: vec!["*.conf".to_string(), "*rc".to_string()],
1679                grammar: "bash".to_string(),
1680                comment_prefix: Some("#".to_string()),
1681                auto_indent: true,
1682                auto_close: None,
1683                auto_surround: None,
1684                textmate_grammar: None,
1685                show_whitespace_tabs: true,
1686                line_wrap: None,
1687                wrap_column: None,
1688                page_view: None,
1689                page_width: None,
1690                use_tabs: None,
1691                tab_size: None,
1692                formatter: None,
1693                format_on_save: false,
1694                on_save: vec![],
1695                word_characters: None,
1696            },
1697        );
1698        registry.apply_language_config(&languages);
1699
1700        assert!(
1701            registry
1702                .find_by_path(Path::new("nftables.conf"), None)
1703                .is_some(),
1704            "*.conf should match nftables.conf"
1705        );
1706        assert!(
1707            registry.find_by_path(Path::new("lfrc"), None).is_some(),
1708            "*rc should match lfrc"
1709        );
1710        // Unrelated file shouldn't panic.
1711        let _ = registry.find_by_path(Path::new("randomfile"), None);
1712    }
1713
1714    #[test]
1715    fn test_find_syntax_with_path_glob_filenames() {
1716        let mut registry = GrammarRegistry::default();
1717        let mut languages = std::collections::HashMap::new();
1718        languages.insert(
1719            "shell-configs".to_string(),
1720            crate::config::LanguageConfig {
1721                extensions: vec!["sh".to_string()],
1722                filenames: vec!["/etc/**/rc.*".to_string()],
1723                grammar: "bash".to_string(),
1724                comment_prefix: Some("#".to_string()),
1725                auto_indent: true,
1726                auto_close: None,
1727                auto_surround: None,
1728                textmate_grammar: None,
1729                show_whitespace_tabs: true,
1730                line_wrap: None,
1731                wrap_column: None,
1732                page_view: None,
1733                page_width: None,
1734                use_tabs: None,
1735                tab_size: None,
1736                formatter: None,
1737                format_on_save: false,
1738                on_save: vec![],
1739                word_characters: None,
1740            },
1741        );
1742        registry.apply_language_config(&languages);
1743
1744        assert!(
1745            registry
1746                .find_by_path(Path::new("/etc/rc.conf"), None)
1747                .is_some(),
1748            "/etc/**/rc.* should match /etc/rc.conf"
1749        );
1750        assert!(
1751            registry
1752                .find_by_path(Path::new("/etc/init/rc.local"), None)
1753                .is_some(),
1754            "/etc/**/rc.* should match /etc/init/rc.local"
1755        );
1756        let _ = registry.find_by_path(Path::new("/var/rc.conf"), None);
1757    }
1758
1759    #[test]
1760    fn test_exact_filename_takes_priority_over_glob() {
1761        let mut registry = GrammarRegistry::default();
1762        let mut languages = std::collections::HashMap::new();
1763
1764        // A language with exact filename "lfrc" -> python grammar
1765        languages.insert(
1766            "custom-lfrc".to_string(),
1767            crate::config::LanguageConfig {
1768                extensions: vec![],
1769                filenames: vec!["lfrc".to_string()],
1770                grammar: "python".to_string(),
1771                comment_prefix: Some("#".to_string()),
1772                auto_indent: true,
1773                auto_close: None,
1774                auto_surround: None,
1775                textmate_grammar: None,
1776                show_whitespace_tabs: true,
1777                line_wrap: None,
1778                wrap_column: None,
1779                page_view: None,
1780                page_width: None,
1781                use_tabs: None,
1782                tab_size: None,
1783                formatter: None,
1784                format_on_save: false,
1785                on_save: vec![],
1786                word_characters: None,
1787            },
1788        );
1789
1790        // A language with glob "*rc" -> bash grammar
1791        languages.insert(
1792            "rc-files".to_string(),
1793            crate::config::LanguageConfig {
1794                extensions: vec![],
1795                filenames: vec!["*rc".to_string()],
1796                grammar: "bash".to_string(),
1797                comment_prefix: Some("#".to_string()),
1798                auto_indent: true,
1799                auto_close: None,
1800                auto_surround: None,
1801                textmate_grammar: None,
1802                show_whitespace_tabs: true,
1803                line_wrap: None,
1804                wrap_column: None,
1805                page_view: None,
1806                page_width: None,
1807                use_tabs: None,
1808                tab_size: None,
1809                formatter: None,
1810                format_on_save: false,
1811                on_save: vec![],
1812                word_characters: None,
1813            },
1814        );
1815
1816        registry.apply_language_config(&languages);
1817
1818        // "lfrc" should match the exact rule (python), not the glob (bash)
1819        let entry = registry.find_by_path(Path::new("lfrc"), None).unwrap();
1820        assert!(
1821            entry.display_name.to_lowercase().contains("python"),
1822            "exact match should win over glob, got: {}",
1823            entry.display_name
1824        );
1825    }
1826
1827    #[test]
1828    fn test_built_in_aliases_resolve() {
1829        let registry = GrammarRegistry::default();
1830
1831        // "bash" should resolve to "Bourne Again Shell (bash)" via alias
1832        let syntax = registry.find_syntax_by_name("bash");
1833        assert!(syntax.is_some(), "alias 'bash' should resolve");
1834        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1835
1836        // "cpp" should resolve to "C++"
1837        let syntax = registry.find_syntax_by_name("cpp");
1838        assert!(syntax.is_some(), "alias 'cpp' should resolve");
1839        assert_eq!(syntax.unwrap().name, "C++");
1840
1841        // "csharp" should resolve to "C#"
1842        let syntax = registry.find_syntax_by_name("csharp");
1843        assert!(syntax.is_some(), "alias 'csharp' should resolve");
1844        assert_eq!(syntax.unwrap().name, "C#");
1845
1846        // "sh" should also resolve to bash
1847        let syntax = registry.find_syntax_by_name("sh");
1848        assert!(syntax.is_some(), "alias 'sh' should resolve");
1849        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1850
1851        // "proto" should resolve to "Protocol Buffers"
1852        let syntax = registry.find_syntax_by_name("proto");
1853        assert!(syntax.is_some(), "alias 'proto' should resolve");
1854        assert_eq!(syntax.unwrap().name, "Protocol Buffers");
1855    }
1856
1857    #[test]
1858    fn test_alias_case_insensitive_input() {
1859        let registry = GrammarRegistry::default();
1860
1861        // Aliases should be case-insensitive on input
1862        let syntax = registry.find_syntax_by_name("BASH");
1863        assert!(
1864            syntax.is_some(),
1865            "alias 'BASH' should resolve case-insensitively"
1866        );
1867        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1868
1869        let syntax = registry.find_syntax_by_name("Cpp");
1870        assert!(
1871            syntax.is_some(),
1872            "alias 'Cpp' should resolve case-insensitively"
1873        );
1874        assert_eq!(syntax.unwrap().name, "C++");
1875    }
1876
1877    #[test]
1878    fn test_full_name_still_works() {
1879        let registry = GrammarRegistry::default();
1880
1881        // Full names should still work (exact match)
1882        let syntax = registry.find_syntax_by_name("Bourne Again Shell (bash)");
1883        assert!(syntax.is_some(), "full name should still resolve");
1884        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1885
1886        // Case-insensitive full name should still work
1887        let syntax = registry.find_syntax_by_name("bourne again shell (bash)");
1888        assert!(
1889            syntax.is_some(),
1890            "case-insensitive full name should resolve"
1891        );
1892        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1893    }
1894
1895    #[test]
1896    fn test_alias_does_not_shadow_full_names() {
1897        let registry = GrammarRegistry::default();
1898
1899        // "Rust" should resolve directly via case-insensitive match, not via alias
1900        let syntax = registry.find_syntax_by_name("rust");
1901        assert!(syntax.is_some());
1902        assert_eq!(syntax.unwrap().name, "Rust");
1903
1904        // "Go" should resolve directly
1905        let syntax = registry.find_syntax_by_name("go");
1906        assert!(syntax.is_some());
1907        assert_eq!(syntax.unwrap().name, "Go");
1908    }
1909
1910    #[test]
1911    fn test_register_alias_rejects_collision() {
1912        let mut registry = GrammarRegistry::default();
1913
1914        // Trying to register an alias that maps to two different targets should fail
1915        assert!(registry.register_alias("myalias", "Rust"));
1916        assert!(!registry.register_alias("myalias", "Go"));
1917
1918        // Same mapping is fine (idempotent)
1919        assert!(registry.register_alias("myalias", "Rust"));
1920    }
1921
1922    #[test]
1923    fn test_register_alias_rejects_nonexistent_target() {
1924        let mut registry = GrammarRegistry::default();
1925        assert!(!registry.register_alias("nope", "Nonexistent Grammar"));
1926    }
1927
1928    #[test]
1929    fn test_register_alias_skips_existing_grammar_name() {
1930        let mut registry = GrammarRegistry::default();
1931
1932        // "rust" case-insensitively matches the grammar "Rust", so no alias needed
1933        assert!(!registry.register_alias("rust", "Rust"));
1934        // Should still be resolvable via case-insensitive match
1935        assert!(registry.find_syntax_by_name("rust").is_some());
1936    }
1937
1938    #[test]
1939    fn test_available_grammar_info_includes_short_names() {
1940        let registry = GrammarRegistry::default();
1941        let infos = registry.available_grammar_info();
1942
1943        let bash_info = infos.iter().find(|g| g.name == "Bourne Again Shell (bash)");
1944        assert!(bash_info.is_some(), "bash grammar should be in the list");
1945        let bash_info = bash_info.unwrap();
1946        assert!(
1947            bash_info.short_name.is_some(),
1948            "bash grammar should have a short_name"
1949        );
1950        // The shortest alias for bash is "sh"
1951        assert_eq!(bash_info.short_name.as_deref(), Some("sh"));
1952    }
1953
1954    #[test]
1955    fn test_catalog_contains_each_language_once() {
1956        let registry = GrammarRegistry::default();
1957        let catalog = registry.catalog();
1958
1959        // Every catalog entry must have a unique (case-insensitive) display name.
1960        let mut seen = std::collections::HashSet::new();
1961        for entry in catalog {
1962            let key = entry.display_name.to_lowercase();
1963            assert!(
1964                seen.insert(key.clone()),
1965                "duplicate catalog entry for display_name={:?}",
1966                entry.display_name
1967            );
1968        }
1969
1970        // TypeScript is tree-sitter-only (syntect ships no grammar for it) yet
1971        // must still appear in the catalog.
1972        let ts = registry
1973            .find_by_name("TypeScript")
1974            .expect("TypeScript must be in the catalog");
1975        assert!(ts.engines.syntect.is_none());
1976        assert_eq!(
1977            ts.engines.tree_sitter,
1978            Some(fresh_languages::Language::TypeScript)
1979        );
1980        assert_eq!(ts.language_id, "typescript");
1981        assert!(ts.extensions.iter().any(|e| e == "ts"));
1982
1983        // Languages that exist in both syntect and tree-sitter (Rust, Python)
1984        // must appear exactly once and prefer the syntect engine.
1985        for name in ["Rust", "Python"] {
1986            let entry = registry
1987                .find_by_name(name)
1988                .unwrap_or_else(|| panic!("{} must be in the catalog", name));
1989            assert!(
1990                entry.engines.syntect.is_some(),
1991                "{} should have a syntect index",
1992                name
1993            );
1994            assert!(
1995                entry.engines.tree_sitter.is_some(),
1996                "{} should also have a tree-sitter language",
1997                name
1998            );
1999            // Only one entry with this display name (already checked above),
2000            // but also verify language_id lookup lands on the same entry.
2001            let by_id = registry
2002                .find_by_name(&entry.language_id)
2003                .expect("language_id should resolve");
2004            assert_eq!(by_id.display_name, entry.display_name);
2005        }
2006
2007        // JavaScript is deliberately routed to tree-sitter only — the
2008        // bundled syntect JavaScript grammar mishandles certain template
2009        // literals and bleeds string state into the rest of the file
2010        // (issue #899). The catalog must therefore expose a tree-sitter-only
2011        // entry, even though syntect ships a JavaScript grammar.
2012        let js = registry
2013            .find_by_name("JavaScript")
2014            .expect("JavaScript must be in the catalog");
2015        assert!(
2016            js.engines.syntect.is_none(),
2017            "JavaScript must not be routed to the syntect engine (issue #899)"
2018        );
2019        assert_eq!(
2020            js.engines.tree_sitter,
2021            Some(fresh_languages::Language::JavaScript),
2022            "JavaScript must carry the tree-sitter language"
2023        );
2024    }
2025
2026    #[test]
2027    fn test_catalog_find_by_path_and_extension() {
2028        let registry = GrammarRegistry::default();
2029        let ts = registry
2030            .find_by_path(Path::new("foo.ts"), None)
2031            .expect("foo.ts should resolve");
2032        assert_eq!(ts.display_name, "TypeScript");
2033        let rs = registry.find_by_extension("rs").expect("rs should resolve");
2034        assert_eq!(rs.display_name, "Rust");
2035    }
2036
2037    /// Build a minimal LanguageConfig for tests.
2038    fn lang_cfg(
2039        grammar: &str,
2040        extensions: &[&str],
2041        filenames: &[&str],
2042    ) -> crate::config::LanguageConfig {
2043        crate::config::LanguageConfig {
2044            extensions: extensions.iter().map(|s| s.to_string()).collect(),
2045            filenames: filenames.iter().map(|s| s.to_string()).collect(),
2046            grammar: grammar.to_string(),
2047            comment_prefix: None,
2048            auto_indent: true,
2049            auto_close: None,
2050            auto_surround: None,
2051            textmate_grammar: None,
2052            show_whitespace_tabs: true,
2053            line_wrap: None,
2054            wrap_column: None,
2055            page_view: None,
2056            page_width: None,
2057            use_tabs: None,
2058            tab_size: None,
2059            formatter: None,
2060            format_on_save: false,
2061            on_save: vec![],
2062            word_characters: None,
2063        }
2064    }
2065
2066    /// Bug #1: a user-declared config key that aliases an existing grammar
2067    /// (e.g. `[languages.mylang] grammar = "Rust"`) must resolve via
2068    /// `find_by_name("mylang")` so the language palette can select it.
2069    #[test]
2070    fn test_user_alias_resolves_via_find_by_name() {
2071        let mut registry = GrammarRegistry::default();
2072        let mut languages = std::collections::HashMap::new();
2073        languages.insert("mylang".to_string(), lang_cfg("Rust", &[], &[]));
2074        registry.apply_language_config(&languages);
2075
2076        let entry = registry
2077            .find_by_name("mylang")
2078            .expect("user-declared alias 'mylang' must resolve");
2079        assert_eq!(entry.display_name, "Rust");
2080    }
2081
2082    /// Bug #2: `register_alias` used to rebuild the catalog from scratch,
2083    /// wiping out everything `apply_language_config` had merged. Registering
2084    /// an alias afterwards must not lose user config.
2085    #[test]
2086    fn test_register_alias_preserves_applied_language_config() {
2087        let mut registry = GrammarRegistry::default();
2088        let mut languages = std::collections::HashMap::new();
2089        languages.insert(
2090            "shell-configs".to_string(),
2091            lang_cfg("bash", &["myconf"], &["*.myconf"]),
2092        );
2093        registry.apply_language_config(&languages);
2094
2095        // Sanity: config applied.
2096        assert!(registry.find_by_extension("myconf").is_some());
2097        assert!(
2098            registry
2099                .find_by_path(Path::new("foo.myconf"), None)
2100                .is_some(),
2101            "glob should match before register_alias"
2102        );
2103
2104        // Registering an alias must not erase the config we just applied.
2105        registry.register_alias("mycustom", "Rust");
2106
2107        assert!(
2108            registry.find_by_extension("myconf").is_some(),
2109            "config extension must survive register_alias"
2110        );
2111        assert!(
2112            registry
2113                .find_by_path(Path::new("foo.myconf"), None)
2114                .is_some(),
2115            "glob must survive register_alias"
2116        );
2117    }
2118
2119    /// Bug #4: `from_syntax_name` used to unconditionally overwrite the
2120    /// catalog's canonical display name with whatever the user typed (e.g.
2121    /// "BASH") — that string ended up in the status bar.
2122    #[test]
2123    fn test_from_syntax_name_preserves_canonical_display_name() {
2124        use crate::primitives::detected_language::DetectedLanguage;
2125        let registry = GrammarRegistry::default();
2126        let languages = std::collections::HashMap::new();
2127
2128        let detected = DetectedLanguage::from_syntax_name("BASH", &registry, &languages)
2129            .expect("BASH should resolve via alias");
2130        assert_eq!(
2131            detected.display_name, "Bourne Again Shell (bash)",
2132            "display_name must be canonical, not user-typed"
2133        );
2134    }
2135
2136    /// A config-only language (no matching syntect grammar) must still appear
2137    /// in the catalog so the language palette can offer it — the old
2138    /// `DetectedLanguage::from_config_language` branch was load-bearing.
2139    #[test]
2140    fn test_config_only_language_appears_in_catalog() {
2141        let mut registry = GrammarRegistry::default();
2142        let mut languages = std::collections::HashMap::new();
2143        // "fish" isn't in syntect; grammar="fish" doesn't resolve either.
2144        languages.insert("fish".to_string(), lang_cfg("fish", &["fish"], &[]));
2145        registry.apply_language_config(&languages);
2146
2147        let entry = registry
2148            .find_by_name("fish")
2149            .expect("fish should be in the catalog after apply_language_config");
2150        assert!(entry.engines.syntect.is_none());
2151        assert!(entry.engines.tree_sitter.is_none());
2152        assert_eq!(entry.language_id, "fish");
2153        assert!(entry.extensions.iter().any(|e| e == "fish"));
2154    }
2155
2156    /// Config-declared extensions must override the built-in mapping. If the
2157    /// user says `[languages.typescript-overlay] extensions = ["js"] grammar
2158    /// = "TypeScript"`, then `foo.js` must resolve to TypeScript, not
2159    /// JavaScript.
2160    #[test]
2161    fn test_config_extension_overrides_builtin() {
2162        let mut registry = GrammarRegistry::default();
2163        // Sanity: default mapping is JavaScript.
2164        assert_eq!(
2165            registry.find_by_extension("js").unwrap().display_name,
2166            "JavaScript"
2167        );
2168
2169        let mut languages = std::collections::HashMap::new();
2170        languages.insert(
2171            "ts-overlay".to_string(),
2172            lang_cfg("TypeScript", &["js"], &[]),
2173        );
2174        registry.apply_language_config(&languages);
2175
2176        assert_eq!(
2177            registry.find_by_extension("js").unwrap().display_name,
2178            "TypeScript",
2179            "user-config extension must win over built-in"
2180        );
2181    }
2182
2183    /// Bare filenames listed by syntect grammars (e.g. "Gemfile", "Makefile",
2184    /// "Rakefile") must resolve through `find_by_path`. Syntect stores these
2185    /// in each grammar's `file_extensions` field alongside real extensions
2186    /// like "rb"; its own `find_syntax_for_file` treats them as either. The
2187    /// catalog has to do the same or `HighlightEngine::for_file` breaks for
2188    /// every extensionless config file.
2189    #[test]
2190    fn test_bare_filename_resolves_via_find_by_path() {
2191        let registry = GrammarRegistry::default();
2192        for (filename, expected_substr) in [
2193            ("Gemfile", "ruby"),
2194            ("Rakefile", "ruby"),
2195            ("Vagrantfile", "ruby"),
2196            ("Makefile", "makefile"),
2197            ("GNUmakefile", "makefile"),
2198        ] {
2199            let entry = registry
2200                .find_by_path(Path::new(filename), None)
2201                .unwrap_or_else(|| panic!("{} must resolve via catalog", filename));
2202            assert!(
2203                entry.display_name.to_lowercase().contains(expected_substr),
2204                "{} should resolve to {} grammar, got {}",
2205                filename,
2206                expected_substr,
2207                entry.display_name
2208            );
2209        }
2210    }
2211
2212    /// Languages that have both syntect and tree-sitter (e.g. JavaScript) must
2213    /// expose the union of both engines' extensions. Tree-sitter-javascript
2214    /// knows `.jsx`; syntect's JavaScript grammar does not. Both should route
2215    /// through the JavaScript catalog entry.
2216    #[test]
2217    fn test_jsx_resolves_to_javascript() {
2218        let registry = GrammarRegistry::default();
2219        let entry = registry
2220            .find_by_path(Path::new("foo.jsx"), None)
2221            .expect("foo.jsx must resolve");
2222        assert_eq!(entry.display_name, "JavaScript");
2223    }
2224
2225    /// `rebuild_catalog` must replay the last-applied language config so it
2226    /// can never silently wipe user `[languages]` rules. This is the invariant
2227    /// that keeps `register_alias`, `populate_built_in_aliases`, and any
2228    /// future rebuild callsite safe-by-construction.
2229    #[test]
2230    fn test_rebuild_catalog_replays_language_config() {
2231        let mut registry = GrammarRegistry::default();
2232        let mut languages = std::collections::HashMap::new();
2233        languages.insert(
2234            "myshell".to_string(),
2235            lang_cfg("bash", &["myext"], &["*.myglob"]),
2236        );
2237        registry.apply_language_config(&languages);
2238        assert!(registry.find_by_extension("myext").is_some());
2239        assert!(registry
2240            .find_by_path(Path::new("foo.myglob"), None)
2241            .is_some());
2242
2243        // Force a rebuild — the catalog gets wiped and re-populated from
2244        // syntect / tree-sitter, but user config must come back on top.
2245        registry.rebuild_catalog();
2246        assert!(
2247            registry.find_by_extension("myext").is_some(),
2248            "rebuild_catalog must replay applied user config"
2249        );
2250        assert!(
2251            registry
2252                .find_by_path(Path::new("foo.myglob"), None)
2253                .is_some(),
2254            "rebuild_catalog must replay user globs"
2255        );
2256    }
2257
2258    /// `apply_language_config` must be idempotent: calling it twice with the
2259    /// same config yields the same catalog state.
2260    #[test]
2261    fn test_apply_language_config_idempotent() {
2262        let mut registry = GrammarRegistry::default();
2263        let mut languages = std::collections::HashMap::new();
2264        languages.insert(
2265            "shell-cfg".to_string(),
2266            lang_cfg("bash", &["myconf"], &["*.myconf"]),
2267        );
2268
2269        registry.apply_language_config(&languages);
2270        let first_extensions = registry
2271            .find_by_name("bash")
2272            .unwrap()
2273            .extensions
2274            .iter()
2275            .filter(|e| e == &"myconf")
2276            .count();
2277        let first_globs = registry
2278            .find_by_name("bash")
2279            .unwrap()
2280            .filename_globs
2281            .iter()
2282            .filter(|g| g == &"*.myconf")
2283            .count();
2284        assert_eq!(first_extensions, 1);
2285        assert_eq!(first_globs, 1);
2286
2287        // Second call must not duplicate anything.
2288        registry.apply_language_config(&languages);
2289        let second_extensions = registry
2290            .find_by_name("bash")
2291            .unwrap()
2292            .extensions
2293            .iter()
2294            .filter(|e| e == &"myconf")
2295            .count();
2296        let second_globs = registry
2297            .find_by_name("bash")
2298            .unwrap()
2299            .filename_globs
2300            .iter()
2301            .filter(|g| g == &"*.myconf")
2302            .count();
2303        assert_eq!(second_extensions, 1, "extensions must not duplicate");
2304        assert_eq!(second_globs, 1, "globs must not duplicate");
2305    }
2306
2307    /// Julia: a single-quote after an identifier is the adjoint
2308    /// (conjugate-transpose) postfix operator, not the start of a string. The
2309    /// old grammar pushed a string context on every `'`, so `A'` swallowed
2310    /// the rest of the file until the next quote — wrecking highlighting for
2311    /// any subsequent keyword. Issue #1852.
2312    #[test]
2313    fn test_julia_adjoint_does_not_start_string() {
2314        use syntect::parsing::{ParseState, ScopeStack};
2315
2316        let registry = GrammarRegistry::default();
2317        let syntax_set = registry.syntax_set();
2318        let syntax = registry
2319            .find_syntax_by_name("Julia")
2320            .expect("Julia grammar must be loaded");
2321        let mut state = ParseState::new(syntax);
2322        let mut scopes = ScopeStack::new();
2323
2324        // Adjoint operator followed by code on later lines.
2325        let lines = ["x = A'\n", "function foo()\n", "end\n"];
2326        let mut keyword_line_in_string = false;
2327        let mut found_function_keyword = false;
2328
2329        for line in &lines {
2330            let ops = state.parse_line(line, syntax_set).unwrap();
2331            // Walk byte-by-byte, applying ops as we pass their offset.
2332            let mut op_iter = ops.iter().peekable();
2333            for (byte_idx, _) in line.char_indices() {
2334                while let Some((offset, op)) = op_iter.peek() {
2335                    if *offset <= byte_idx {
2336                        scopes.apply(op).unwrap();
2337                        op_iter.next();
2338                    } else {
2339                        break;
2340                    }
2341                }
2342                let in_string = scopes
2343                    .as_slice()
2344                    .iter()
2345                    .any(|s| s.build_string().starts_with("string."));
2346                let is_function_kw = line[byte_idx..].starts_with("function");
2347                if is_function_kw && in_string {
2348                    keyword_line_in_string = true;
2349                }
2350                if is_function_kw && !in_string {
2351                    found_function_keyword = true;
2352                }
2353            }
2354            // Drain remaining ops at end of line.
2355            for (_, op) in op_iter {
2356                scopes.apply(op).unwrap();
2357            }
2358        }
2359
2360        assert!(
2361            !keyword_line_in_string,
2362            "the `function` keyword after an adjoint operator must not be inside a string scope"
2363        );
2364        assert!(
2365            found_function_keyword,
2366            "test harness must have reached the `function` keyword"
2367        );
2368    }
2369
2370    /// Julia: `'a'` is a valid character literal. The grammar must still
2371    /// scope it as a constant/character so themes can color it. Issue #1852.
2372    #[test]
2373    fn test_julia_char_literal_is_recognized() {
2374        use syntect::parsing::{ParseState, ScopeStack};
2375
2376        let registry = GrammarRegistry::default();
2377        let syntax_set = registry.syntax_set();
2378        let syntax = registry
2379            .find_syntax_by_name("Julia")
2380            .expect("Julia grammar must be loaded");
2381        let mut state = ParseState::new(syntax);
2382        let mut scopes = ScopeStack::new();
2383
2384        let line = "x = 'a'\n";
2385        let ops = state.parse_line(line, syntax_set).unwrap();
2386        let mut saw_constant_or_string_at_quote = false;
2387        let mut op_iter = ops.iter().peekable();
2388        for (byte_idx, _) in line.char_indices() {
2389            while let Some((offset, op)) = op_iter.peek() {
2390                if *offset <= byte_idx {
2391                    scopes.apply(op).unwrap();
2392                    op_iter.next();
2393                } else {
2394                    break;
2395                }
2396            }
2397            if byte_idx == 5 {
2398                // position of 'a' (the char)
2399                let scoped = scopes.as_slice().iter().any(|s| {
2400                    let str = s.build_string();
2401                    str.starts_with("constant.") || str.starts_with("string.")
2402                });
2403                if scoped {
2404                    saw_constant_or_string_at_quote = true;
2405                }
2406            }
2407        }
2408        assert!(
2409            saw_constant_or_string_at_quote,
2410            "char literal 'a' must receive a constant/string scope"
2411        );
2412    }
2413
2414    /// `tree_sitter_for_syntect_name` handles the alias table + strict
2415    /// display-name match. The alias table catches syntect's verbose names;
2416    /// the strict match handles the common case.
2417    #[test]
2418    fn test_tree_sitter_bridge() {
2419        assert_eq!(
2420            tree_sitter_for_syntect_name("Bourne Again Shell (bash)"),
2421            Some(fresh_languages::Language::Bash)
2422        );
2423        assert_eq!(
2424            tree_sitter_for_syntect_name("Rust"),
2425            Some(fresh_languages::Language::Rust)
2426        );
2427        // Must NOT fuzzy-match Nushell to Bash.
2428        assert_eq!(tree_sitter_for_syntect_name("Nushell"), None);
2429        // Must NOT match arbitrary strings.
2430        assert_eq!(tree_sitter_for_syntect_name("does-not-exist"), None);
2431    }
2432}