Skip to main content

fresh/primitives/grammar/
types.rs

1//! Pure grammar registry types without I/O operations.
2//!
3//! This module contains the `GrammarRegistry` struct and all syntax lookup methods
4//! that don't require filesystem access. This enables WASM compatibility and easier testing.
5
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10use syntect::parsing::{SyntaxDefinition, SyntaxReference, SyntaxSet, SyntaxSetBuilder};
11
12// Re-export glob matching utilities for use by other modules
13pub use crate::primitives::glob_match::{
14    filename_glob_matches, is_glob_pattern, is_path_pattern, path_glob_matches,
15};
16
17/// A grammar specification: language name, path to grammar file, and associated file extensions.
18///
19/// Used to pass grammar information between the plugin layer, loader, and registry
20/// without relying on anonymous tuples.
21#[derive(Clone, Debug)]
22pub struct GrammarSpec {
23    /// Language identifier (e.g., "elixir")
24    pub language: String,
25    /// Path to the grammar file (.sublime-syntax)
26    pub path: PathBuf,
27    /// File extensions to associate with this grammar (e.g., ["ex", "exs"])
28    pub extensions: Vec<String>,
29}
30
31/// Where a grammar was loaded from.
32#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
33#[serde(tag = "type")]
34pub enum GrammarSource {
35    /// Built-in to Fresh (pre-compiled syntect defaults + embedded grammars)
36    #[serde(rename = "built-in")]
37    BuiltIn,
38    /// Installed from a user grammar directory (~/.config/fresh/grammars/)
39    #[serde(rename = "user")]
40    User { path: PathBuf },
41    /// From a language pack (~/.config/fresh/languages/packages/)
42    #[serde(rename = "language-pack")]
43    LanguagePack { name: String, path: PathBuf },
44    /// From a bundle package (~/.config/fresh/bundles/packages/)
45    #[serde(rename = "bundle")]
46    Bundle { name: String, path: PathBuf },
47    /// Registered by a plugin at runtime
48    #[serde(rename = "plugin")]
49    Plugin { plugin: String, path: PathBuf },
50}
51
52impl std::fmt::Display for GrammarSource {
53    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54        match self {
55            GrammarSource::BuiltIn => write!(f, "built-in"),
56            GrammarSource::User { path } => write!(f, "user ({})", path.display()),
57            GrammarSource::LanguagePack { name, .. } => write!(f, "language-pack ({})", name),
58            GrammarSource::Bundle { name, .. } => write!(f, "bundle ({})", name),
59            GrammarSource::Plugin { plugin, .. } => write!(f, "plugin ({})", plugin),
60        }
61    }
62}
63
64/// Information about an available grammar, including its provenance.
65#[derive(Clone, Debug, Serialize, Deserialize)]
66pub struct GrammarInfo {
67    /// The grammar name as used in config files (case-insensitive matching)
68    pub name: String,
69    /// Where this grammar was loaded from
70    pub source: GrammarSource,
71    /// File extensions associated with this grammar
72    pub file_extensions: Vec<String>,
73    /// Optional short name alias (e.g., "bash" for "Bourne Again Shell (bash)")
74    #[serde(default, skip_serializing_if = "Option::is_none")]
75    pub short_name: Option<String>,
76}
77
78/// Bridge between syntect display names and `fresh_languages::Language`.
79///
80/// Most syntect grammars map one-to-one: "Rust" → `Language::Rust`. A few
81/// have verbose display names that don't match the tree-sitter enum's
82/// `display_name()`, and `Language::from_name` has fuzzy "contains shell"
83/// fallbacks that would wrongly tag Nushell as tree-sitter Bash. This is
84/// the one place we spell the exceptions out explicitly.
85const SYNTECT_TO_TREE_SITTER_ALIASES: &[(&str, fresh_languages::Language)] =
86    &[("Bourne Again Shell (bash)", fresh_languages::Language::Bash)];
87
88/// Resolve a syntect syntax display name to a tree-sitter language, using
89/// strict equality against the alias table and `Language::display_name()`.
90fn tree_sitter_for_syntect_name(display_name: &str) -> Option<fresh_languages::Language> {
91    for (syntect_name, lang) in SYNTECT_TO_TREE_SITTER_ALIASES {
92        if *syntect_name == display_name {
93            return Some(*lang);
94        }
95    }
96    fresh_languages::Language::all()
97        .iter()
98        .find(|l| l.display_name() == display_name)
99        .copied()
100}
101
102/// Which highlighters can serve a given `GrammarEntry`.
103///
104/// A catalog entry may come from syntect (a TextMate grammar indexed into
105/// `SyntaxSet`), tree-sitter (a `fresh_languages::Language`), or both.
106#[derive(Clone, Debug, Default)]
107pub struct GrammarEngines {
108    /// Index into `GrammarRegistry::syntax_set().syntaxes()`, if a syntect
109    /// grammar is available.
110    pub syntect: Option<usize>,
111    /// Tree-sitter language, if one is registered for this grammar.
112    pub tree_sitter: Option<fresh_languages::Language>,
113}
114
115/// A single entry in the unified grammar catalog.
116///
117/// Each entry represents one logical language (e.g. "Rust", "TypeScript") and
118/// records which highlighting engines can serve it, plus the names/extensions
119/// used to look it up. The catalog is the single source of truth for grammar
120/// lookups — `find_by_name`, `find_by_path`, `find_by_extension` all return
121/// entries from here, and both `HighlightEngine::from_entry` and
122/// `DetectedLanguage::from_entry` consume them.
123#[derive(Clone, Debug)]
124pub struct GrammarEntry {
125    /// Human-readable display name (e.g. "TypeScript", "Bourne Again Shell (bash)").
126    pub display_name: String,
127    /// Canonical language ID used in config and LSP (e.g. "typescript", "csharp").
128    pub language_id: String,
129    /// Short alias, if one exists (e.g. "ts" for TypeScript).
130    pub short_name: Option<String>,
131    /// File extensions (without leading dot).
132    pub extensions: Vec<String>,
133    /// Exact filenames that map to this grammar (e.g. "Dockerfile").
134    pub filenames: Vec<String>,
135    /// Filename globs from user config (e.g. "*.conf", "/etc/**/rc.*").
136    pub filename_globs: Vec<String>,
137    /// Where this grammar was loaded from.
138    pub source: GrammarSource,
139    /// Highlighters that can serve this entry.
140    pub engines: GrammarEngines,
141}
142
143/// Embedded TOML grammar (syntect doesn't include one)
144pub const TOML_GRAMMAR: &str = include_str!("../../grammars/toml.sublime-syntax");
145
146/// Embedded Odin grammar (syntect doesn't include one)
147/// From: https://github.com/Tetralux/sublime-odin (MIT License)
148pub const ODIN_GRAMMAR: &str = include_str!("../../grammars/odin/Odin.sublime-syntax");
149
150/// Embedded Zig grammar (syntect doesn't include one)
151pub const ZIG_GRAMMAR: &str = include_str!("../../grammars/zig.sublime-syntax");
152
153/// Embedded GDScript grammar
154/// Based on https://github.com/beefsack/GDScript-sublime (MIT License)
155pub const GDSCRIPT_GRAMMAR: &str = include_str!("../../grammars/gdscript.sublime-syntax");
156
157/// Embedded Git Rebase Todo grammar for interactive rebase
158pub const GIT_REBASE_GRAMMAR: &str = include_str!("../../grammars/git-rebase.sublime-syntax");
159
160/// Embedded Git Commit Message grammar for COMMIT_EDITMSG, MERGE_MSG, etc.
161pub const GIT_COMMIT_GRAMMAR: &str = include_str!("../../grammars/git-commit.sublime-syntax");
162
163/// Embedded Gitignore grammar for .gitignore and similar files
164pub const GITIGNORE_GRAMMAR: &str = include_str!("../../grammars/gitignore.sublime-syntax");
165
166/// Embedded Git Config grammar for .gitconfig, .gitmodules
167pub const GITCONFIG_GRAMMAR: &str = include_str!("../../grammars/gitconfig.sublime-syntax");
168
169/// Embedded Git Attributes grammar for .gitattributes
170pub const GITATTRIBUTES_GRAMMAR: &str = include_str!("../../grammars/gitattributes.sublime-syntax");
171
172/// Embedded Typst grammar (syntect doesn't include one)
173pub const TYPST_GRAMMAR: &str = include_str!("../../grammars/typst.sublime-syntax");
174
175/// Embedded Dockerfile grammar
176pub const DOCKERFILE_GRAMMAR: &str = include_str!("../../grammars/dockerfile.sublime-syntax");
177/// Embedded INI grammar (also handles .env, .cfg, .editorconfig, etc.)
178pub const INI_GRAMMAR: &str = include_str!("../../grammars/ini.sublime-syntax");
179/// Embedded CMake grammar
180pub const CMAKE_GRAMMAR: &str = include_str!("../../grammars/cmake.sublime-syntax");
181/// Embedded SCSS grammar
182pub const SCSS_GRAMMAR: &str = include_str!("../../grammars/scss.sublime-syntax");
183/// Embedded LESS grammar
184pub const LESS_GRAMMAR: &str = include_str!("../../grammars/less.sublime-syntax");
185/// Embedded PowerShell grammar
186pub const POWERSHELL_GRAMMAR: &str = include_str!("../../grammars/powershell.sublime-syntax");
187/// Embedded Kotlin grammar
188pub const KOTLIN_GRAMMAR: &str = include_str!("../../grammars/kotlin.sublime-syntax");
189/// Embedded Swift grammar
190pub const SWIFT_GRAMMAR: &str = include_str!("../../grammars/swift.sublime-syntax");
191/// Embedded Dart grammar
192pub const DART_GRAMMAR: &str = include_str!("../../grammars/dart.sublime-syntax");
193/// Embedded Elixir grammar
194pub const ELIXIR_GRAMMAR: &str = include_str!("../../grammars/elixir.sublime-syntax");
195/// Embedded F# grammar
196pub const FSHARP_GRAMMAR: &str = include_str!("../../grammars/fsharp.sublime-syntax");
197/// Embedded Nix grammar
198pub const NIX_GRAMMAR: &str = include_str!("../../grammars/nix.sublime-syntax");
199/// Embedded HCL/Terraform grammar
200pub const HCL_GRAMMAR: &str = include_str!("../../grammars/hcl.sublime-syntax");
201/// Embedded Protocol Buffers grammar
202pub const PROTOBUF_GRAMMAR: &str = include_str!("../../grammars/protobuf.sublime-syntax");
203/// Embedded GraphQL grammar
204pub const GRAPHQL_GRAMMAR: &str = include_str!("../../grammars/graphql.sublime-syntax");
205/// Embedded Julia grammar
206pub const JULIA_GRAMMAR: &str = include_str!("../../grammars/julia.sublime-syntax");
207/// Embedded Nim grammar
208pub const NIM_GRAMMAR: &str = include_str!("../../grammars/nim.sublime-syntax");
209/// Embedded Gleam grammar
210pub const GLEAM_GRAMMAR: &str = include_str!("../../grammars/gleam.sublime-syntax");
211/// Embedded V language grammar
212pub const VLANG_GRAMMAR: &str = include_str!("../../grammars/vlang.sublime-syntax");
213/// Embedded Solidity grammar
214pub const SOLIDITY_GRAMMAR: &str = include_str!("../../grammars/solidity.sublime-syntax");
215/// Embedded KDL grammar
216pub const KDL_GRAMMAR: &str = include_str!("../../grammars/kdl.sublime-syntax");
217/// Embedded Nushell grammar
218pub const NUSHELL_GRAMMAR: &str = include_str!("../../grammars/nushell.sublime-syntax");
219/// Embedded Starlark/Bazel grammar
220pub const STARLARK_GRAMMAR: &str = include_str!("../../grammars/starlark.sublime-syntax");
221/// Embedded Justfile grammar
222pub const JUSTFILE_GRAMMAR: &str = include_str!("../../grammars/justfile.sublime-syntax");
223/// Embedded Earthfile grammar
224pub const EARTHFILE_GRAMMAR: &str = include_str!("../../grammars/earthfile.sublime-syntax");
225/// Embedded Go Module grammar
226pub const GOMOD_GRAMMAR: &str = include_str!("../../grammars/gomod.sublime-syntax");
227/// Embedded Vue grammar
228pub const VUE_GRAMMAR: &str = include_str!("../../grammars/vue.sublime-syntax");
229/// Embedded Svelte grammar
230pub const SVELTE_GRAMMAR: &str = include_str!("../../grammars/svelte.sublime-syntax");
231/// Embedded Astro grammar
232pub const ASTRO_GRAMMAR: &str = include_str!("../../grammars/astro.sublime-syntax");
233/// Embedded Hyprlang grammar (Hyprland config)
234pub const HYPRLANG_GRAMMAR: &str = include_str!("../../grammars/hyprlang.sublime-syntax");
235/// Embedded AutoHotkey grammar
236/// From: https://github.com/SALZKARTOFFEEEL/ahk-sublime-syntax (MIT License)
237pub const AUTOHOTKEY_GRAMMAR: &str =
238    include_str!("../../grammars/autohotkey/AutoHotkey.sublime-syntax");
239/// Embedded Racket grammar (syntect doesn't include one)
240pub const RACKET_GRAMMAR: &str = include_str!("../../grammars/racket.sublime-syntax");
241/// Embedded Verilog grammar (HDL)
242pub const VERILOG_GRAMMAR: &str = include_str!("../../grammars/verilog.sublime-syntax");
243/// Embedded SystemVerilog grammar (HDL)
244pub const SYSTEMVERILOG_GRAMMAR: &str = include_str!("../../grammars/systemverilog.sublime-syntax");
245/// Embedded VHDL grammar (HDL)
246pub const VHDL_GRAMMAR: &str = include_str!("../../grammars/vhdl.sublime-syntax");
247
248pub const C3_GRAMMAR: &str = include_str!("../../grammars/c3.sublime-syntax");
249
250/// Registry of all available TextMate grammars.
251///
252/// This struct holds the compiled syntax set and provides lookup methods.
253/// It does not perform I/O directly - use `GrammarLoader` for loading grammars.
254impl std::fmt::Debug for GrammarRegistry {
255    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
256        f.debug_struct("GrammarRegistry")
257            .field("syntax_count", &self.syntax_set.syntaxes().len())
258            .finish()
259    }
260}
261
262pub struct GrammarRegistry {
263    /// Combined syntax set (built-in + embedded + user grammars)
264    syntax_set: Arc<SyntaxSet>,
265    /// Extension -> scope name mapping for user grammars (takes priority)
266    user_extensions: HashMap<String, String>,
267    /// Filename -> scope name mapping for dotfiles and special files
268    filename_scopes: HashMap<String, String>,
269    /// Paths to dynamically loaded grammar files (for reloading when adding more)
270    loaded_grammar_paths: Vec<GrammarSpec>,
271    /// Provenance info for each grammar (keyed by grammar name)
272    grammar_sources: HashMap<String, GrammarInfo>,
273    /// Short name aliases: lowercase short_name -> full syntect grammar name.
274    /// Provides a deterministic, one-to-one mapping so users can write
275    /// `grammar = "bash"` instead of `grammar = "Bourne Again Shell (bash)"`.
276    aliases: HashMap<String, String>,
277    /// Unified catalog of every known grammar. Rebuilt whenever the syntax set
278    /// or alias table changes. Lookups (`find_by_name`, `find_by_path`, ...)
279    /// all resolve against this.
280    catalog: Vec<GrammarEntry>,
281    /// Index from lowercased lookup keys (display name, language_id, short_name)
282    /// to catalog index.
283    catalog_by_name: HashMap<String, usize>,
284    /// Index from file extension (without dot) to catalog index.
285    catalog_by_extension: HashMap<String, usize>,
286    /// Index from filename to catalog index.
287    catalog_by_filename: HashMap<String, usize>,
288    /// The most recent language config handed to `apply_language_config`.
289    /// Retained so `rebuild_catalog` can replay it — otherwise a rebuild
290    /// (triggered by e.g. `populate_built_in_aliases`) silently wipes user
291    /// `[languages]` config that was merged on top.
292    applied_language_config: HashMap<String, crate::config::LanguageConfig>,
293    /// Monotonic generation, bumped on every catalog mutation. Lets
294    /// observers (plugin state snapshot) detect changes with one integer
295    /// compare instead of recounting entries.
296    catalog_gen: u64,
297}
298
299impl GrammarRegistry {
300    /// Create a new GrammarRegistry from pre-built components.
301    ///
302    /// This is typically called by `GrammarLoader` implementations after
303    /// loading grammars from various sources.
304    pub(crate) fn new(
305        syntax_set: SyntaxSet,
306        user_extensions: HashMap<String, String>,
307        filename_scopes: HashMap<String, String>,
308    ) -> Self {
309        Self::new_with_loaded_paths(
310            syntax_set,
311            user_extensions,
312            filename_scopes,
313            Vec::new(),
314            HashMap::new(),
315        )
316    }
317
318    /// Create a GrammarRegistry with pre-loaded grammar path tracking.
319    ///
320    /// Used by the loader when plugin grammars were included in the initial build,
321    /// so that `loaded_grammar_paths()` reflects what was actually loaded.
322    pub(crate) fn new_with_loaded_paths(
323        syntax_set: SyntaxSet,
324        user_extensions: HashMap<String, String>,
325        filename_scopes: HashMap<String, String>,
326        loaded_grammar_paths: Vec<GrammarSpec>,
327        grammar_sources: HashMap<String, GrammarInfo>,
328    ) -> Self {
329        let mut reg = Self {
330            syntax_set: Arc::new(syntax_set),
331            user_extensions,
332            filename_scopes,
333            loaded_grammar_paths,
334            grammar_sources,
335            aliases: HashMap::new(),
336            catalog: Vec::new(),
337            catalog_by_name: HashMap::new(),
338            catalog_by_extension: HashMap::new(),
339            catalog_by_filename: HashMap::new(),
340            applied_language_config: HashMap::new(),
341            catalog_gen: 0,
342        };
343        reg.rebuild_catalog();
344        reg
345    }
346
347    /// Create an empty grammar registry (fast, for tests that don't need syntax highlighting)
348    pub fn empty() -> Arc<Self> {
349        let mut builder = SyntaxSetBuilder::new();
350        builder.add_plain_text_syntax();
351        let mut reg = Self {
352            syntax_set: Arc::new(builder.build()),
353            user_extensions: HashMap::new(),
354            filename_scopes: HashMap::new(),
355            loaded_grammar_paths: Vec::new(),
356            grammar_sources: HashMap::new(),
357            aliases: HashMap::new(),
358            catalog: Vec::new(),
359            catalog_by_name: HashMap::new(),
360            catalog_by_extension: HashMap::new(),
361            catalog_by_filename: HashMap::new(),
362            applied_language_config: HashMap::new(),
363            catalog_gen: 0,
364        };
365        reg.rebuild_catalog();
366        Arc::new(reg)
367    }
368
369    /// Create a registry with only syntect's pre-compiled defaults (~0ms).
370    ///
371    /// This provides instant syntax highlighting for ~50 common languages
372    /// (Rust, Python, JS/TS, C/C++, Go, Java, HTML, CSS, Markdown, etc.)
373    /// without any `SyntaxSetBuilder::build()` call. Use this at startup,
374    /// then swap in a full registry built on a background thread.
375    pub fn defaults_only() -> Arc<Self> {
376        // Load pre-compiled syntax set (defaults + embedded grammars) from
377        // build-time packdump. This avoids the expensive into_builder() + build()
378        // cycle at runtime (~12s → ~300ms).
379        tracing::info!("defaults_only: loading pre-compiled syntax packdump...");
380        let syntax_set: SyntaxSet = syntect::dumps::from_uncompressed_data(include_bytes!(
381            concat!(env!("OUT_DIR"), "/default_syntaxes.packdump")
382        ))
383        .expect("Failed to load pre-compiled syntax packdump");
384        tracing::info!(
385            "defaults_only: loaded ({} syntaxes)",
386            syntax_set.syntaxes().len()
387        );
388        let grammar_sources = Self::build_grammar_sources_from_syntax_set(&syntax_set);
389        let filename_scopes = Self::build_filename_scopes();
390        let extra_extensions = Self::build_extra_extensions();
391        let mut registry = Self {
392            syntax_set: Arc::new(syntax_set),
393            user_extensions: extra_extensions,
394            filename_scopes,
395            loaded_grammar_paths: Vec::new(),
396            grammar_sources,
397            aliases: HashMap::new(),
398            catalog: Vec::new(),
399            catalog_by_name: HashMap::new(),
400            catalog_by_extension: HashMap::new(),
401            catalog_by_filename: HashMap::new(),
402            applied_language_config: HashMap::new(),
403            catalog_gen: 0,
404        };
405        registry.populate_built_in_aliases();
406        registry.rebuild_catalog();
407        Arc::new(registry)
408    }
409
410    /// Build extra extension -> scope mappings for extensions not covered by syntect defaults.
411    ///
412    /// These map common file extensions to existing syntect grammar scopes,
413    /// filling gaps where syntect's built-in extension lists are incomplete.
414    pub(crate) fn build_extra_extensions() -> HashMap<String, String> {
415        let mut map = HashMap::new();
416
417        // JavaScript variants not in syntect defaults (["js", "htc"])
418        let js_scope = "source.js".to_string();
419        map.insert("cjs".to_string(), js_scope.clone());
420        map.insert("mjs".to_string(), js_scope);
421
422        // Dockerfile variants (e.g. Dockerfile.dev -> .dev extension)
423        // These won't match by extension, handled by filename_scopes and first_line_match
424
425        map
426    }
427
428    /// Build the default filename -> scope mappings for dotfiles and special files.
429    pub(crate) fn build_filename_scopes() -> HashMap<String, String> {
430        let mut map = HashMap::new();
431
432        // Shell configuration files -> Bash/Shell script scope
433        let shell_scope = "source.shell.bash".to_string();
434        for filename in [
435            ".zshrc",
436            ".zprofile",
437            ".zshenv",
438            ".zlogin",
439            ".zlogout",
440            ".bash_aliases",
441            // .bashrc and .bash_profile are already recognized by syntect
442            // Common shell script files without extensions
443            "PKGBUILD",
444            "APKBUILD",
445        ] {
446            map.insert(filename.to_string(), shell_scope.clone());
447        }
448
449        // Git rebase todo files
450        let git_rebase_scope = "source.git-rebase-todo".to_string();
451        map.insert("git-rebase-todo".to_string(), git_rebase_scope);
452
453        // Git commit message files
454        let git_commit_scope = "source.git-commit".to_string();
455        for filename in ["COMMIT_EDITMSG", "MERGE_MSG", "SQUASH_MSG", "TAG_EDITMSG"] {
456            map.insert(filename.to_string(), git_commit_scope.clone());
457        }
458
459        // Gitignore and similar files
460        let gitignore_scope = "source.gitignore".to_string();
461        for filename in [".gitignore", ".dockerignore", ".npmignore", ".hgignore"] {
462            map.insert(filename.to_string(), gitignore_scope.clone());
463        }
464
465        // Git config files
466        let gitconfig_scope = "source.gitconfig".to_string();
467        for filename in [".gitconfig", ".gitmodules"] {
468            map.insert(filename.to_string(), gitconfig_scope.clone());
469        }
470
471        // Git attributes files
472        let gitattributes_scope = "source.gitattributes".to_string();
473        map.insert(".gitattributes".to_string(), gitattributes_scope);
474
475        // Jenkinsfile -> Groovy
476        let groovy_scope = "source.groovy".to_string();
477        map.insert("Jenkinsfile".to_string(), groovy_scope);
478
479        // Vagrantfile -> Ruby (syntect already handles this, but be explicit)
480        // Brewfile -> Ruby
481        let ruby_scope = "source.ruby".to_string();
482        map.insert("Brewfile".to_string(), ruby_scope);
483
484        // Dockerfile and variants (exact names; Dockerfile.* handled via prefix check)
485        let dockerfile_scope = "source.dockerfile".to_string();
486        map.insert("Dockerfile".to_string(), dockerfile_scope.clone());
487        map.insert("Containerfile".to_string(), dockerfile_scope.clone());
488        // Common Dockerfile variants
489        map.insert("Dockerfile.dev".to_string(), dockerfile_scope.clone());
490        map.insert("Dockerfile.prod".to_string(), dockerfile_scope.clone());
491        map.insert("Dockerfile.test".to_string(), dockerfile_scope.clone());
492        map.insert("Dockerfile.build".to_string(), dockerfile_scope.clone());
493
494        // CMake
495        let cmake_scope = "source.cmake".to_string();
496        map.insert("CMakeLists.txt".to_string(), cmake_scope);
497
498        // Starlark/Bazel
499        let starlark_scope = "source.starlark".to_string();
500        map.insert("BUILD".to_string(), starlark_scope.clone());
501        map.insert("BUILD.bazel".to_string(), starlark_scope.clone());
502        map.insert("WORKSPACE".to_string(), starlark_scope.clone());
503        map.insert("WORKSPACE.bazel".to_string(), starlark_scope.clone());
504        map.insert("Tiltfile".to_string(), starlark_scope);
505
506        // Justfile (various casings)
507        let justfile_scope = "source.justfile".to_string();
508        map.insert("justfile".to_string(), justfile_scope.clone());
509        map.insert("Justfile".to_string(), justfile_scope.clone());
510        map.insert(".justfile".to_string(), justfile_scope);
511
512        // EditorConfig -> INI
513        let ini_scope = "source.ini".to_string();
514        map.insert(".editorconfig".to_string(), ini_scope);
515
516        // Earthfile
517        let earthfile_scope = "source.earthfile".to_string();
518        map.insert("Earthfile".to_string(), earthfile_scope);
519
520        // Hyprlang (Hyprland config files)
521        let hyprlang_scope = "source.hyprlang".to_string();
522        map.insert("hyprland.conf".to_string(), hyprlang_scope.clone());
523        map.insert("hyprpaper.conf".to_string(), hyprlang_scope.clone());
524        map.insert("hyprlock.conf".to_string(), hyprlang_scope);
525
526        // go.mod / go.sum
527        let gomod_scope = "source.gomod".to_string();
528        map.insert("go.mod".to_string(), gomod_scope.clone());
529        map.insert("go.sum".to_string(), gomod_scope);
530
531        map
532    }
533
534    /// Add embedded grammars (TOML, Odin, etc.) to a syntax set builder.
535    pub(crate) fn add_embedded_grammars(builder: &mut SyntaxSetBuilder) {
536        // TOML grammar
537        match SyntaxDefinition::load_from_str(TOML_GRAMMAR, true, Some("TOML")) {
538            Ok(syntax) => {
539                builder.add(syntax);
540                tracing::debug!("Loaded embedded TOML grammar");
541            }
542            Err(e) => {
543                tracing::warn!("Failed to load embedded TOML grammar: {}", e);
544            }
545        }
546
547        // Odin grammar
548        match SyntaxDefinition::load_from_str(ODIN_GRAMMAR, true, Some("Odin")) {
549            Ok(syntax) => {
550                builder.add(syntax);
551                tracing::debug!("Loaded embedded Odin grammar");
552            }
553            Err(e) => {
554                tracing::warn!("Failed to load embedded Odin grammar: {}", e);
555            }
556        }
557
558        // Zig grammar
559        match SyntaxDefinition::load_from_str(ZIG_GRAMMAR, true, Some("Zig")) {
560            Ok(syntax) => {
561                builder.add(syntax);
562                tracing::debug!("Loaded embedded Zig grammar");
563            }
564            Err(e) => {
565                tracing::warn!("Failed to load embedded Zig grammar: {}", e);
566            }
567        }
568
569        // GDScript grammar
570        match SyntaxDefinition::load_from_str(GDSCRIPT_GRAMMAR, true, Some("GDScript")) {
571            Ok(syntax) => {
572                builder.add(syntax);
573                tracing::debug!("Loaded embedded GDScript grammar");
574            }
575            Err(e) => {
576                tracing::warn!("Failed to load embedded GDScript grammar: {}", e);
577            }
578        }
579
580        // Git Rebase Todo grammar
581        match SyntaxDefinition::load_from_str(GIT_REBASE_GRAMMAR, true, Some("Git Rebase Todo")) {
582            Ok(syntax) => {
583                builder.add(syntax);
584                tracing::debug!("Loaded embedded Git Rebase Todo grammar");
585            }
586            Err(e) => {
587                tracing::warn!("Failed to load embedded Git Rebase Todo grammar: {}", e);
588            }
589        }
590
591        // Git Commit Message grammar
592        match SyntaxDefinition::load_from_str(GIT_COMMIT_GRAMMAR, true, Some("Git Commit Message"))
593        {
594            Ok(syntax) => {
595                builder.add(syntax);
596                tracing::debug!("Loaded embedded Git Commit Message grammar");
597            }
598            Err(e) => {
599                tracing::warn!("Failed to load embedded Git Commit Message grammar: {}", e);
600            }
601        }
602
603        // Gitignore grammar
604        match SyntaxDefinition::load_from_str(GITIGNORE_GRAMMAR, true, Some("Gitignore")) {
605            Ok(syntax) => {
606                builder.add(syntax);
607                tracing::debug!("Loaded embedded Gitignore grammar");
608            }
609            Err(e) => {
610                tracing::warn!("Failed to load embedded Gitignore grammar: {}", e);
611            }
612        }
613
614        // Git Config grammar
615        match SyntaxDefinition::load_from_str(GITCONFIG_GRAMMAR, true, Some("Git Config")) {
616            Ok(syntax) => {
617                builder.add(syntax);
618                tracing::debug!("Loaded embedded Git Config grammar");
619            }
620            Err(e) => {
621                tracing::warn!("Failed to load embedded Git Config grammar: {}", e);
622            }
623        }
624
625        // Git Attributes grammar
626        match SyntaxDefinition::load_from_str(GITATTRIBUTES_GRAMMAR, true, Some("Git Attributes")) {
627            Ok(syntax) => {
628                builder.add(syntax);
629                tracing::debug!("Loaded embedded Git Attributes grammar");
630            }
631            Err(e) => {
632                tracing::warn!("Failed to load embedded Git Attributes grammar: {}", e);
633            }
634        }
635
636        // Typst grammar
637        match SyntaxDefinition::load_from_str(TYPST_GRAMMAR, true, Some("Typst")) {
638            Ok(syntax) => {
639                builder.add(syntax);
640                tracing::debug!("Loaded embedded Typst grammar");
641            }
642            Err(e) => {
643                tracing::warn!("Failed to load embedded Typst grammar: {}", e);
644            }
645        }
646
647        // Additional embedded grammars for languages not in syntect defaults
648        let additional_grammars: &[(&str, &str)] = &[
649            (DOCKERFILE_GRAMMAR, "Dockerfile"),
650            (INI_GRAMMAR, "INI"),
651            (CMAKE_GRAMMAR, "CMake"),
652            (SCSS_GRAMMAR, "SCSS"),
653            (LESS_GRAMMAR, "LESS"),
654            (POWERSHELL_GRAMMAR, "PowerShell"),
655            (KOTLIN_GRAMMAR, "Kotlin"),
656            (SWIFT_GRAMMAR, "Swift"),
657            (DART_GRAMMAR, "Dart"),
658            (ELIXIR_GRAMMAR, "Elixir"),
659            (FSHARP_GRAMMAR, "FSharp"),
660            (NIX_GRAMMAR, "Nix"),
661            (HCL_GRAMMAR, "HCL"),
662            (PROTOBUF_GRAMMAR, "Protocol Buffers"),
663            (GRAPHQL_GRAMMAR, "GraphQL"),
664            (JULIA_GRAMMAR, "Julia"),
665            (NIM_GRAMMAR, "Nim"),
666            (GLEAM_GRAMMAR, "Gleam"),
667            (VLANG_GRAMMAR, "V"),
668            (SOLIDITY_GRAMMAR, "Solidity"),
669            (KDL_GRAMMAR, "KDL"),
670            (NUSHELL_GRAMMAR, "Nushell"),
671            (STARLARK_GRAMMAR, "Starlark"),
672            (JUSTFILE_GRAMMAR, "Justfile"),
673            (EARTHFILE_GRAMMAR, "Earthfile"),
674            (GOMOD_GRAMMAR, "Go Module"),
675            (VUE_GRAMMAR, "Vue"),
676            (SVELTE_GRAMMAR, "Svelte"),
677            (ASTRO_GRAMMAR, "Astro"),
678            (HYPRLANG_GRAMMAR, "Hyprlang"),
679            (AUTOHOTKEY_GRAMMAR, "AutoHotkey"),
680            (RACKET_GRAMMAR, "Racket"),
681            (VERILOG_GRAMMAR, "Verilog"),
682            (SYSTEMVERILOG_GRAMMAR, "SystemVerilog"),
683            (VHDL_GRAMMAR, "VHDL"),
684            (C3_GRAMMAR, "C3"),
685        ];
686
687        for (grammar_str, name) in additional_grammars {
688            match SyntaxDefinition::load_from_str(grammar_str, true, Some(name)) {
689                Ok(syntax) => {
690                    builder.add(syntax);
691                    tracing::debug!("Loaded embedded {} grammar", name);
692                }
693                Err(e) => {
694                    tracing::warn!("Failed to load embedded {} grammar: {}", name, e);
695                }
696            }
697        }
698    }
699
700    /// Find syntax for a file by path/extension/filename.
701    ///
702    /// Purely metadata-based — does not read the file. For first-line
703    /// (shebang) fallback, use [`find_by_path`] with a `first_line` argument
704    /// and resolve the returned entry's syntect index.
705    pub fn find_syntax_for_file(&self, path: &Path) -> Option<&SyntaxReference> {
706        let entry = self.find_by_path(path, None)?;
707        entry
708            .engines
709            .syntect
710            .map(|i| &self.syntax_set.syntaxes()[i])
711    }
712
713    /// Find syntax by name, with alias resolution.
714    ///
715    /// Thin wrapper around `find_by_name` that returns the associated syntect
716    /// `SyntaxReference`. Tree-sitter-only entries return `None`.
717    ///
718    /// Falls back to a direct syntect lookup for "Plain Text", which the
719    /// catalog deliberately omits but syntect still exposes.
720    pub fn find_syntax_by_name(&self, name: &str) -> Option<&SyntaxReference> {
721        if let Some(entry) = self.find_by_name(name) {
722            if let Some(idx) = entry.engines.syntect {
723                return Some(&self.syntax_set.syntaxes()[idx]);
724            }
725        }
726        // Plain Text is excluded from the catalog (it's not a "grammar" a user
727        // would ever pick), but syntect still stores it and a handful of
728        // callers still ask for it by name.
729        self.syntax_set.find_syntax_by_name(name)
730    }
731
732    // === Alias management ===
733
734    /// Hardcoded short-name aliases for built-in and embedded grammars.
735    ///
736    /// Each entry maps a short name (lowercase) to the exact syntect grammar name.
737    /// Only grammars whose full name differs significantly from a natural short
738    /// form need an entry here. Grammars already short (e.g., "Rust", "Go") are
739    /// reachable via case-insensitive matching and don't need aliases.
740    fn built_in_aliases() -> Vec<(&'static str, &'static str)> {
741        vec![
742            // Syntect built-in grammars with verbose names
743            ("bash", "Bourne Again Shell (bash)"),
744            ("shell", "Bourne Again Shell (bash)"),
745            ("sh", "Bourne Again Shell (bash)"),
746            ("c++", "C++"),
747            ("cpp", "C++"),
748            ("csharp", "C#"),
749            ("objc", "Objective-C"),
750            ("objcpp", "Objective-C++"),
751            ("regex", "Regular Expressions (Python)"),
752            ("regexp", "Regular Expressions (Python)"),
753            // Embedded grammars with multi-word or non-obvious names
754            ("proto", "Protocol Buffers"),
755            ("protobuf", "Protocol Buffers"),
756            ("gomod", "Go Module"),
757            ("git-rebase", "Git Rebase Todo"),
758            ("git-commit", "Git Commit Message"),
759            ("git-config", "Git Config"),
760            ("git-attributes", "Git Attributes"),
761            ("gitignore", "Gitignore"),
762            ("fsharp", "FSharp"),
763            ("f#", "FSharp"),
764            ("terraform", "HCL"),
765            ("tf", "HCL"),
766            ("ts", "TypeScript"),
767            ("js", "JavaScript"),
768            ("py", "Python"),
769            ("rb", "Ruby"),
770            ("rs", "Rust"),
771            ("md", "Markdown"),
772            ("yml", "YAML"),
773            ("dockerfile", "Dockerfile"),
774        ]
775    }
776
777    /// Populate aliases from the built-in table.
778    ///
779    /// Validates that:
780    /// - Each alias target (full name) exists in the syntax set
781    /// - No alias collides (case-insensitive) with an existing grammar full name
782    /// - No duplicate aliases exist
783    pub(crate) fn populate_built_in_aliases(&mut self) {
784        for (short, full) in Self::built_in_aliases() {
785            self.register_alias_inner(short, full, true);
786        }
787        self.rebuild_catalog();
788    }
789
790    /// Register a short-name alias for a grammar.
791    ///
792    /// Returns `true` if the alias was registered, `false` if rejected due to
793    /// collision or missing target. For built-in aliases, collisions panic
794    /// (they indicate a bug). For dynamic aliases, collisions log a warning.
795    ///
796    /// Splices the alias directly into the catalog rather than rebuilding, so
797    /// any user config previously merged via `apply_language_config` is
798    /// preserved. A full rebuild would wipe those entries.
799    pub(crate) fn register_alias(&mut self, short_name: &str, full_name: &str) -> bool {
800        if !self.register_alias_inner(short_name, full_name, false) {
801            return false;
802        }
803        let short_lower = short_name.to_lowercase();
804        let full_lower = full_name.to_lowercase();
805        if let Some(&idx) = self.catalog_by_name.get(&full_lower) {
806            self.catalog_by_name
807                .entry(short_lower.clone())
808                .or_insert(idx);
809            let entry = &mut self.catalog[idx];
810            let replace = match &entry.short_name {
811                None => true,
812                Some(existing) => short_name.len() < existing.len(),
813            };
814            if replace {
815                entry.short_name = Some(short_lower);
816            }
817        }
818        true
819    }
820
821    fn register_alias_inner(
822        &mut self,
823        short_name: &str,
824        full_name: &str,
825        is_built_in: bool,
826    ) -> bool {
827        let short_lower = short_name.to_lowercase();
828
829        // Validate: target grammar must exist in the syntax set
830        let target_exists = self
831            .syntax_set
832            .syntaxes()
833            .iter()
834            .any(|s| s.name.eq_ignore_ascii_case(full_name));
835        if !target_exists {
836            // Tree-sitter-only targets (e.g. TypeScript) are expected to be
837            // absent from the syntect set. `rebuild_catalog` attaches their
838            // short names via a separate pass over `built_in_aliases()`.
839            if tree_sitter_for_syntect_name(full_name).is_some() {
840                return false;
841            }
842            if is_built_in {
843                // Built-in alias targets should always exist; warn but don't panic
844                // (grammar might have been removed from syntect upstream)
845                tracing::warn!(
846                    "[grammar-alias] Built-in alias '{}' -> '{}': target grammar not found, skipping",
847                    short_name, full_name
848                );
849            } else {
850                tracing::warn!(
851                    "[grammar-alias] Alias '{}' -> '{}': target grammar not found, skipping",
852                    short_name,
853                    full_name
854                );
855            }
856            return false;
857        }
858
859        // Validate: short name must not collide (case-insensitive) with any grammar full name
860        let collides_with_full_name = self
861            .syntax_set
862            .syntaxes()
863            .iter()
864            .any(|s| s.name.eq_ignore_ascii_case(&short_lower));
865        if collides_with_full_name {
866            // This is actually fine — the short name matches a full name directly,
867            // so find_syntax_by_name's case-insensitive search will find it.
868            // No alias needed.
869            tracing::debug!(
870                "[grammar-alias] Alias '{}' matches an existing grammar name, skipping (not needed)",
871                short_name
872            );
873            return false;
874        }
875
876        // Validate: no duplicate alias (case-insensitive)
877        if let Some(existing_target) = self.aliases.get(&short_lower) {
878            if existing_target.eq_ignore_ascii_case(full_name) {
879                // Same mapping, no-op
880                return true;
881            }
882            let msg = format!(
883                "Alias '{}' already maps to '{}', cannot remap to '{}'",
884                short_name, existing_target, full_name
885            );
886            if is_built_in {
887                panic!("[grammar-alias] Built-in alias collision: {}", msg);
888            } else {
889                tracing::warn!("[grammar-alias] {}", msg);
890                return false;
891            }
892        }
893
894        // Resolve the exact syntect name (preserving original case)
895        let exact_name = self
896            .syntax_set
897            .syntaxes()
898            .iter()
899            .find(|s| s.name.eq_ignore_ascii_case(full_name))
900            .map(|s| s.name.clone())
901            .unwrap();
902
903        self.aliases.insert(short_lower, exact_name);
904        true
905    }
906
907    // === Unified catalog ===
908
909    /// Rebuild the flat catalog of grammar entries.
910    ///
911    /// Called after the syntax set, aliases, or filename scopes change.
912    /// Produces one entry per logical language by merging:
913    /// 1. Every `SyntaxReference` in the syntax set (except "Plain Text")
914    /// 2. Every `fresh_languages::Language` not already covered by a syntect entry
915    /// 3. Alias short-names attached to their target entry
916    /// 4. Filename mappings from `filename_scopes` attached to their scope's entry
917    /// 5. Extra extensions from `user_extensions` attached to their scope's entry
918    ///
919    /// Automatically replays the last `apply_language_config` at the end, so
920    /// user `[languages]` config survives any rebuild.
921    pub(crate) fn rebuild_catalog(&mut self) {
922        // Reverse-map: full_name (lowercase) -> shortest alias.
923        //
924        // Seed from the built-in alias table as well as the live `aliases`
925        // HashMap: the live map only contains aliases whose target exists in
926        // the syntect set, so tree-sitter-only entries (TypeScript) would
927        // otherwise never get their short name ("ts").
928        let mut short_by_full: HashMap<String, String> = HashMap::new();
929        let record = |map: &mut HashMap<String, String>, short: &str, full: &str| {
930            let key = full.to_lowercase();
931            let keep = match map.get(&key) {
932                None => true,
933                Some(existing) => short.len() < existing.len(),
934            };
935            if keep {
936                map.insert(key, short.to_string());
937            }
938        };
939        for (short, full) in Self::built_in_aliases() {
940            record(&mut short_by_full, short, full);
941        }
942        for (short, full) in &self.aliases {
943            record(&mut short_by_full, short, full);
944        }
945
946        let derive_language_id =
947            |display_name: &str| -> (String, Option<fresh_languages::Language>) {
948                let ts = tree_sitter_for_syntect_name(display_name);
949                let id = ts
950                    .map(|l| l.id().to_string())
951                    .unwrap_or_else(|| display_name.to_lowercase());
952                (id, ts)
953            };
954
955        let mut catalog: Vec<GrammarEntry> = Vec::new();
956        let mut scope_to_index: HashMap<String, usize> = HashMap::new();
957
958        // Syntect-backed entries (skip Plain Text and JavaScript).
959        //
960        // Syntect's `file_extensions` is a hybrid list: real extensions like
961        // "rb" sit alongside bare filenames like "Gemfile", "Rakefile",
962        // "Makefile". Syntect's own `find_syntax_for_file` tries each entry
963        // against the whole filename AND against the path's extension, and
964        // the catalog has to preserve that semantics. We keep everything in
965        // `extensions` here and index each entry as *both* an extension and
966        // a filename at the bottom of this method.
967        //
968        // JavaScript is skipped here so the catalog falls through to the
969        // tree-sitter-only fallback below — the bundled syntect JS grammar
970        // mishandles class fields whose initialiser is an arrow function
971        // returning a template literal (issue #899: state leaks past the
972        // closing backtick and paints the rest of the file as a string).
973        // tree-sitter-javascript parses template literals from the AST and
974        // does not have this failure mode. `find_syntax_by_name("JavaScript")`
975        // still returns syntect's grammar via the catalog's fallback path,
976        // so markdown popup rendering and other code-string highlighters
977        // are unaffected.
978        for (idx, syntax) in self.syntax_set.syntaxes().iter().enumerate() {
979            if syntax.name == "Plain Text" || syntax.name == "JavaScript" {
980                continue;
981            }
982            let (language_id, tree_sitter) = derive_language_id(&syntax.name);
983            let short_name = short_by_full.get(&syntax.name.to_lowercase()).cloned();
984            let source = self
985                .grammar_sources
986                .get(&syntax.name)
987                .map(|info| info.source.clone())
988                .unwrap_or(GrammarSource::BuiltIn);
989            let entry_index = catalog.len();
990            scope_to_index.insert(syntax.scope.to_string(), entry_index);
991
992            // Union syntect's file_extensions with tree-sitter's own
993            // extension list when the entry carries both engines.
994            // tree-sitter-javascript handles `.jsx`/`.mjs`/`.cjs` that
995            // syntect's JS grammar doesn't list, and the old code used to
996            // route those paths to tree-sitter via a separate lookup.
997            let mut extensions = syntax.file_extensions.clone();
998            if let Some(lang) = tree_sitter {
999                for ext in lang.extensions() {
1000                    let ext = ext.to_string();
1001                    if !extensions.iter().any(|e| e == &ext) {
1002                        extensions.push(ext);
1003                    }
1004                }
1005            }
1006
1007            catalog.push(GrammarEntry {
1008                display_name: syntax.name.clone(),
1009                language_id,
1010                short_name,
1011                extensions,
1012                filenames: Vec::new(),
1013                filename_globs: Vec::new(),
1014                source,
1015                engines: GrammarEngines {
1016                    syntect: Some(idx),
1017                    tree_sitter,
1018                },
1019            });
1020        }
1021
1022        // Attach filename_scopes to their entries.
1023        for (filename, scope) in &self.filename_scopes {
1024            if let Some(&idx) = scope_to_index.get(scope) {
1025                if !catalog[idx].filenames.iter().any(|f| f == filename) {
1026                    catalog[idx].filenames.push(filename.clone());
1027                }
1028            }
1029        }
1030
1031        // Attach user_extensions (extra → scope) to their entries.
1032        for (ext, scope) in &self.user_extensions {
1033            if let Some(&idx) = scope_to_index.get(scope) {
1034                if !catalog[idx].extensions.iter().any(|e| e == ext) {
1035                    catalog[idx].extensions.push(ext.clone());
1036                }
1037            }
1038        }
1039
1040        // Ensure every tree-sitter language has an entry. If a syntect entry
1041        // already maps to the same tree-sitter language, skip it; otherwise
1042        // add a tree-sitter-only entry so the catalog is complete (TypeScript
1043        // being the motivating example — syntect ships no grammar for it).
1044        let mut ts_covered: std::collections::HashSet<fresh_languages::Language> =
1045            std::collections::HashSet::new();
1046        for entry in &catalog {
1047            if let Some(lang) = entry.engines.tree_sitter {
1048                ts_covered.insert(lang);
1049            }
1050        }
1051        for lang in fresh_languages::Language::all() {
1052            if ts_covered.contains(lang) {
1053                continue;
1054            }
1055            let display_name = lang.display_name().to_string();
1056            let language_id = lang.id().to_string();
1057            let short_name = short_by_full.get(&display_name.to_lowercase()).cloned();
1058            let extensions: Vec<String> = lang.extensions().iter().map(|s| s.to_string()).collect();
1059            catalog.push(GrammarEntry {
1060                display_name,
1061                language_id,
1062                short_name,
1063                extensions,
1064                filenames: Vec::new(),
1065                filename_globs: Vec::new(),
1066                source: GrammarSource::BuiltIn,
1067                engines: GrammarEngines {
1068                    syntect: None,
1069                    tree_sitter: Some(*lang),
1070                },
1071            });
1072        }
1073
1074        // Build name / extension / filename indices.
1075        //
1076        // Every entry in `extensions` gets indexed in BOTH `by_extension`
1077        // (lowercased) AND `by_filename` (exact case) — syntect's
1078        // `file_extensions` list holds both real extensions ("rb") and bare
1079        // filenames ("Gemfile", "Rakefile", "Makefile"). Indexing both ways
1080        // matches syntect's own `find_syntax_for_file` semantics.
1081        let mut by_name: HashMap<String, usize> = HashMap::new();
1082        let mut by_extension: HashMap<String, usize> = HashMap::new();
1083        let mut by_filename: HashMap<String, usize> = HashMap::new();
1084        for (idx, entry) in catalog.iter().enumerate() {
1085            by_name.insert(entry.display_name.to_lowercase(), idx);
1086            by_name.insert(entry.language_id.to_lowercase(), idx);
1087            if let Some(short) = &entry.short_name {
1088                by_name.insert(short.to_lowercase(), idx);
1089            }
1090            for ext in &entry.extensions {
1091                by_extension.entry(ext.to_lowercase()).or_insert(idx);
1092                by_filename.entry(ext.clone()).or_insert(idx);
1093            }
1094            for filename in &entry.filenames {
1095                by_filename.entry(filename.clone()).or_insert(idx);
1096            }
1097        }
1098
1099        self.catalog = catalog;
1100        self.catalog_by_name = by_name;
1101        self.catalog_by_extension = by_extension;
1102        self.catalog_by_filename = by_filename;
1103
1104        // Replay the most recent user config so a rebuild doesn't silently
1105        // wipe out user `[languages]` rules. `take` + restore avoids both a
1106        // clone and a borrow checker fight with `apply_language_config_inner`.
1107        if !self.applied_language_config.is_empty() {
1108            let cfg = std::mem::take(&mut self.applied_language_config);
1109            self.apply_language_config_inner(&cfg);
1110            self.applied_language_config = cfg;
1111        }
1112        self.catalog_gen = self.catalog_gen.wrapping_add(1);
1113    }
1114
1115    /// Return the full catalog of grammar entries.
1116    pub fn catalog(&self) -> &[GrammarEntry] {
1117        &self.catalog
1118    }
1119
1120    /// Monotonic generation, bumped on every catalog mutation. Compare against
1121    /// a previously-observed value to decide whether to recompute derived
1122    /// state.
1123    pub fn catalog_gen(&self) -> u64 {
1124        self.catalog_gen
1125    }
1126
1127    /// Look up a grammar entry by display name, language ID, or short alias
1128    /// (case-insensitive). All aliases — built-in and user-config-declared —
1129    /// are indexed directly in `catalog_by_name` during `rebuild_catalog` /
1130    /// `register_alias` / `apply_language_config`, so a single lookup covers
1131    /// every case.
1132    pub fn find_by_name(&self, name: &str) -> Option<&GrammarEntry> {
1133        self.catalog_by_name
1134            .get(&name.to_lowercase())
1135            .map(|&idx| &self.catalog[idx])
1136    }
1137
1138    /// Look up a grammar entry by file path, with optional first-line content
1139    /// for shebang / `first_line_match` detection.
1140    ///
1141    /// Resolution order:
1142    /// 1. Exact filename (config-declared filenames and filename_scopes live here)
1143    /// 2. Glob patterns from user config (e.g. "*.conf", "/etc/**/rc.*")
1144    /// 3. File extension
1145    /// 4. Shebang / first-line regex match on `first_line` if supplied
1146    ///
1147    /// Globs take priority over extension so a user rule like `*.conf → bash`
1148    /// wins over any built-in extension match on `.conf`. The first-line
1149    /// fallback (#4) is last so catalog matches stay authoritative — syntect
1150    /// might otherwise misclassify `.fish` as bash via its first-line
1151    /// regexes.
1152    ///
1153    /// The first-line fallback is pure: it runs syntect's
1154    /// `find_syntax_by_first_line` regex cache against the caller-supplied
1155    /// string. The registry never touches the filesystem — the caller (who
1156    /// already loaded the buffer via the `FileSystem` trait) must extract
1157    /// the first line and pass it in.
1158    pub fn find_by_path(&self, path: &Path, first_line: Option<&str>) -> Option<&GrammarEntry> {
1159        let filename = path.file_name().and_then(|n| n.to_str());
1160        let path_str = path.to_str().unwrap_or("");
1161
1162        if let Some(name) = filename {
1163            if let Some(&idx) = self.catalog_by_filename.get(name) {
1164                return Some(&self.catalog[idx]);
1165            }
1166        }
1167
1168        // Glob walk — filenames with globs are rare so linear scan is fine.
1169        if let Some(name) = filename {
1170            for entry in &self.catalog {
1171                for pattern in &entry.filename_globs {
1172                    let matched = if is_path_pattern(pattern) {
1173                        path_glob_matches(pattern, path_str)
1174                    } else {
1175                        filename_glob_matches(pattern, name)
1176                    };
1177                    if matched {
1178                        return Some(entry);
1179                    }
1180                }
1181            }
1182        }
1183
1184        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1185            if let Some(entry) = self.find_by_extension(ext) {
1186                return Some(entry);
1187            }
1188        }
1189
1190        // Last resort: shebang / first-line regex match against the
1191        // caller-supplied content. Map the matched syntect grammar back to a
1192        // catalog entry by name — every syntect syntax has a catalog entry,
1193        // so this round-trip preserves tree-sitter attachment.
1194        let line = first_line?;
1195        let syntax = self.syntax_set.find_syntax_by_first_line(line)?;
1196        self.find_by_name(&syntax.name)
1197    }
1198
1199    /// Look up a grammar entry by file extension (case-insensitive, without dot).
1200    pub fn find_by_extension(&self, ext: &str) -> Option<&GrammarEntry> {
1201        self.catalog_by_extension
1202            .get(&ext.to_lowercase())
1203            .map(|&idx| &self.catalog[idx])
1204    }
1205
1206    /// Merge user `[languages]` config into the catalog.
1207    ///
1208    /// For each config entry, resolves its grammar to an existing catalog entry
1209    /// (by grammar name or by language id). Extensions are added and override
1210    /// the ext→entry index so config wins over built-in mappings. Filenames are
1211    /// split into exact matches (indexed) and globs (walked at lookup time).
1212    ///
1213    /// If no existing entry matches, a new engine-less entry is created so the
1214    /// language still appears in the palette.
1215    ///
1216    /// Idempotent. The config is cached on the registry so `rebuild_catalog`
1217    /// can replay it — callers don't need to re-apply after a rebuild.
1218    pub fn apply_language_config(
1219        &mut self,
1220        languages: &HashMap<String, crate::config::LanguageConfig>,
1221    ) {
1222        self.applied_language_config = languages.clone();
1223        self.apply_language_config_inner(languages);
1224        self.catalog_gen = self.catalog_gen.wrapping_add(1);
1225    }
1226
1227    /// Do the actual catalog splicing without touching
1228    /// `applied_language_config`. Called from `apply_language_config` (which
1229    /// records the input) and from `rebuild_catalog` (which replays the
1230    /// cached input after wiping the catalog).
1231    fn apply_language_config_inner(
1232        &mut self,
1233        languages: &HashMap<String, crate::config::LanguageConfig>,
1234    ) {
1235        for (lang_id, lang_cfg) in languages {
1236            let grammar_name = if lang_cfg.grammar.is_empty() {
1237                lang_id.as_str()
1238            } else {
1239                lang_cfg.grammar.as_str()
1240            };
1241
1242            // Resolve to an existing entry; fall back to creating one.
1243            let idx = self
1244                .catalog_by_name
1245                .get(&grammar_name.to_lowercase())
1246                .copied()
1247                .or_else(|| self.catalog_by_name.get(&lang_id.to_lowercase()).copied())
1248                .unwrap_or_else(|| {
1249                    let idx = self.catalog.len();
1250                    self.catalog.push(GrammarEntry {
1251                        display_name: lang_id.clone(),
1252                        language_id: lang_id.clone(),
1253                        short_name: None,
1254                        extensions: Vec::new(),
1255                        filenames: Vec::new(),
1256                        filename_globs: Vec::new(),
1257                        source: GrammarSource::BuiltIn,
1258                        engines: GrammarEngines::default(),
1259                    });
1260                    idx
1261                });
1262
1263            // Always index the config key so `find_by_name("mylang")` resolves
1264            // even when `mylang` aliases an existing grammar (e.g.
1265            // `[languages.mylang] grammar = "Rust"`). `or_insert` preserves
1266            // any existing mapping — won't clobber the canonical entry.
1267            self.catalog_by_name
1268                .entry(lang_id.to_lowercase())
1269                .or_insert(idx);
1270
1271            for ext in &lang_cfg.extensions {
1272                if !self.catalog[idx].extensions.iter().any(|e| e == ext) {
1273                    self.catalog[idx].extensions.push(ext.clone());
1274                }
1275                // Config-declared extensions override any previous mapping.
1276                self.catalog_by_extension.insert(ext.to_lowercase(), idx);
1277            }
1278            for filename in &lang_cfg.filenames {
1279                if is_glob_pattern(filename) {
1280                    if !self.catalog[idx]
1281                        .filename_globs
1282                        .iter()
1283                        .any(|f| f == filename)
1284                    {
1285                        self.catalog[idx].filename_globs.push(filename.clone());
1286                    }
1287                } else {
1288                    if !self.catalog[idx].filenames.iter().any(|f| f == filename) {
1289                        self.catalog[idx].filenames.push(filename.clone());
1290                    }
1291                    self.catalog_by_filename.insert(filename.clone(), idx);
1292                }
1293            }
1294        }
1295    }
1296
1297    /// Get the underlying syntax set
1298    pub fn syntax_set(&self) -> &Arc<SyntaxSet> {
1299        &self.syntax_set
1300    }
1301
1302    /// Get a clone of the Arc for sharing
1303    pub fn syntax_set_arc(&self) -> Arc<SyntaxSet> {
1304        Arc::clone(&self.syntax_set)
1305    }
1306
1307    /// List all available syntax names
1308    pub fn available_syntaxes(&self) -> Vec<&str> {
1309        self.syntax_set
1310            .syntaxes()
1311            .iter()
1312            .map(|s| s.name.as_str())
1313            .collect()
1314    }
1315
1316    /// List all available grammars with provenance information.
1317    ///
1318    /// Returns a sorted list of `GrammarInfo` entries derived from the unified
1319    /// catalog — this includes both syntect grammars and tree-sitter-only
1320    /// languages (like TypeScript). Each entry is listed exactly once even
1321    /// when both engines can serve it.
1322    pub fn available_grammar_info(&self) -> Vec<GrammarInfo> {
1323        let mut result: Vec<GrammarInfo> = self
1324            .catalog
1325            .iter()
1326            .map(|entry| GrammarInfo {
1327                name: entry.display_name.clone(),
1328                source: entry.source.clone(),
1329                file_extensions: entry.extensions.clone(),
1330                short_name: entry.short_name.clone(),
1331            })
1332            .collect();
1333        result.sort_by(|a, b| a.name.to_lowercase().cmp(&b.name.to_lowercase()));
1334        result
1335    }
1336
1337    /// Get the grammar sources map.
1338    pub(crate) fn grammar_sources(&self) -> &HashMap<String, GrammarInfo> {
1339        &self.grammar_sources
1340    }
1341
1342    /// Build grammar source info from a pre-compiled syntax set.
1343    ///
1344    /// All grammars in the packdump (syntect defaults + embedded) are tagged as built-in.
1345    pub(crate) fn build_grammar_sources_from_syntax_set(
1346        syntax_set: &SyntaxSet,
1347    ) -> HashMap<String, GrammarInfo> {
1348        let mut sources = HashMap::new();
1349        for syntax in syntax_set.syntaxes() {
1350            sources.insert(
1351                syntax.name.clone(),
1352                GrammarInfo {
1353                    name: syntax.name.clone(),
1354                    source: GrammarSource::BuiltIn,
1355                    file_extensions: syntax.file_extensions.clone(),
1356                    short_name: None,
1357                },
1358            );
1359        }
1360        sources
1361    }
1362
1363    /// Get the user extensions mapping (extension -> scope name).
1364    #[cfg(test)]
1365    pub(crate) fn user_extensions(&self) -> &HashMap<String, String> {
1366        &self.user_extensions
1367    }
1368
1369    /// Get the loaded grammar paths (for deduplication in flush_pending_grammars).
1370    #[cfg(test)]
1371    pub(crate) fn loaded_grammar_paths(&self) -> &[GrammarSpec] {
1372        &self.loaded_grammar_paths
1373    }
1374
1375    /// Create a new registry with additional grammar files
1376    ///
1377    /// This builds a new GrammarRegistry that includes all grammars from
1378    /// the base registry plus the additional grammars specified.
1379    /// Uses the base registry's syntax_set as the builder base, preserving
1380    /// all existing grammars (user grammars, language packs, etc.).
1381    ///
1382    /// # Arguments
1383    /// * `base` - The base registry to extend
1384    /// * `additional` - List of (language, path, extensions) tuples for new grammars
1385    ///
1386    /// # Returns
1387    /// A new GrammarRegistry with the additional grammars, or None if rebuilding fails
1388    pub fn with_additional_grammars(
1389        base: &GrammarRegistry,
1390        additional: &[GrammarSpec],
1391    ) -> Option<Self> {
1392        tracing::info!(
1393            "[SYNTAX DEBUG] with_additional_grammars: adding {} grammars to base with {} syntaxes",
1394            additional.len(),
1395            base.syntax_set.syntaxes().len()
1396        );
1397
1398        // Use the base registry's syntax_set as builder base — this preserves
1399        // ALL existing grammars (defaults, embedded, user, language packs)
1400        // without needing to reload them from disk.
1401        let mut builder = (*base.syntax_set).clone().into_builder();
1402
1403        // Preserve existing user extensions and add new ones
1404        let mut user_extensions = base.user_extensions.clone();
1405
1406        // Track loaded grammar paths (existing + new)
1407        let mut loaded_grammar_paths = base.loaded_grammar_paths.clone();
1408
1409        // Preserve existing grammar sources
1410        let mut grammar_sources = base.grammar_sources.clone();
1411
1412        // Add each new grammar
1413        for spec in additional {
1414            tracing::info!(
1415                "[SYNTAX DEBUG] loading new grammar file: lang='{}', path={:?}, extensions={:?}",
1416                spec.language,
1417                spec.path,
1418                spec.extensions
1419            );
1420            match Self::load_grammar_file(&spec.path) {
1421                Ok(syntax) => {
1422                    let scope = syntax.scope.to_string();
1423                    let syntax_name = syntax.name.clone();
1424                    tracing::info!(
1425                        "[SYNTAX DEBUG] grammar loaded successfully: name='{}', scope='{}'",
1426                        syntax_name,
1427                        scope
1428                    );
1429                    builder.add(syntax);
1430                    tracing::info!(
1431                        "Loaded grammar for '{}' from {:?} with extensions {:?}",
1432                        spec.language,
1433                        spec.path,
1434                        spec.extensions
1435                    );
1436                    // Register extensions for this grammar
1437                    for ext in &spec.extensions {
1438                        user_extensions.insert(ext.clone(), scope.clone());
1439                    }
1440                    // Track provenance
1441                    grammar_sources.insert(
1442                        syntax_name.clone(),
1443                        GrammarInfo {
1444                            name: syntax_name,
1445                            source: GrammarSource::Plugin {
1446                                plugin: spec.language.clone(),
1447                                path: spec.path.clone(),
1448                            },
1449                            file_extensions: spec.extensions.clone(),
1450                            short_name: None,
1451                        },
1452                    );
1453                    // Track this grammar path for future reloads
1454                    loaded_grammar_paths.push(spec.clone());
1455                }
1456                Err(e) => {
1457                    tracing::warn!(
1458                        "Failed to load grammar for '{}' from {:?}: {}",
1459                        spec.language,
1460                        spec.path,
1461                        e
1462                    );
1463                }
1464            }
1465        }
1466
1467        let mut reg = Self {
1468            syntax_set: Arc::new(builder.build()),
1469            user_extensions,
1470            filename_scopes: base.filename_scopes.clone(),
1471            loaded_grammar_paths,
1472            grammar_sources,
1473            aliases: base.aliases.clone(),
1474            catalog: Vec::new(),
1475            catalog_by_name: HashMap::new(),
1476            catalog_by_extension: HashMap::new(),
1477            catalog_by_filename: HashMap::new(),
1478            applied_language_config: HashMap::new(),
1479            catalog_gen: 0,
1480        };
1481        reg.rebuild_catalog();
1482        Some(reg)
1483    }
1484
1485    /// Load a grammar file from disk
1486    ///
1487    /// Only Sublime Text (.sublime-syntax) format is supported.
1488    /// TextMate (.tmLanguage) grammars use a completely different format
1489    /// and cannot be loaded by syntect's yaml-load feature.
1490    pub(crate) fn load_grammar_file(path: &Path) -> Result<SyntaxDefinition, String> {
1491        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
1492
1493        match ext {
1494            "sublime-syntax" => {
1495                let content = std::fs::read_to_string(path)
1496                    .map_err(|e| format!("Failed to read file: {}", e))?;
1497                SyntaxDefinition::load_from_str(
1498                    &content,
1499                    true,
1500                    path.file_stem().and_then(|s| s.to_str()),
1501                )
1502                .map_err(|e| format!("Failed to parse sublime-syntax: {}", e))
1503            }
1504            _ => Err(format!(
1505                "Unsupported grammar format: .{}. Only .sublime-syntax is supported.",
1506                ext
1507            )),
1508        }
1509    }
1510}
1511
1512impl Default for GrammarRegistry {
1513    fn default() -> Self {
1514        // Create with defaults and embedded grammars only (no user grammars)
1515        let defaults = SyntaxSet::load_defaults_newlines();
1516        let mut builder = defaults.into_builder();
1517        Self::add_embedded_grammars(&mut builder);
1518        let syntax_set = builder.build();
1519        let filename_scopes = Self::build_filename_scopes();
1520        let extra_extensions = Self::build_extra_extensions();
1521
1522        let mut registry = Self::new(syntax_set, extra_extensions, filename_scopes);
1523        registry.populate_built_in_aliases();
1524        registry.rebuild_catalog();
1525        registry
1526    }
1527}
1528
1529// VSCode package.json structures for parsing grammar manifests
1530
1531#[derive(Debug, Deserialize)]
1532pub struct PackageManifest {
1533    #[serde(default)]
1534    pub contributes: Option<Contributes>,
1535}
1536
1537#[derive(Debug, Deserialize, Default)]
1538pub struct Contributes {
1539    #[serde(default)]
1540    pub languages: Vec<LanguageContribution>,
1541    #[serde(default)]
1542    pub grammars: Vec<GrammarContribution>,
1543}
1544
1545#[derive(Debug, Deserialize)]
1546pub struct LanguageContribution {
1547    pub id: String,
1548    #[serde(default)]
1549    pub extensions: Vec<String>,
1550}
1551
1552#[derive(Debug, Deserialize)]
1553pub struct GrammarContribution {
1554    pub language: String,
1555    #[serde(rename = "scopeName")]
1556    pub scope_name: String,
1557    pub path: String,
1558}
1559
1560#[cfg(test)]
1561mod tests {
1562    use super::*;
1563
1564    #[test]
1565    fn test_empty_registry() {
1566        let registry = GrammarRegistry::empty();
1567        // Should have at least plain text
1568        assert!(!registry.available_syntaxes().is_empty());
1569    }
1570
1571    #[test]
1572    fn test_default_registry() {
1573        let registry = GrammarRegistry::default();
1574        // Should have built-in syntaxes
1575        assert!(!registry.available_syntaxes().is_empty());
1576    }
1577
1578    #[test]
1579    fn test_find_syntax_for_common_extensions() {
1580        let registry = GrammarRegistry::default();
1581
1582        // Test common extensions that resolve to a syntect (TextMate) grammar
1583        // via the catalog. JavaScript is intentionally NOT here — it is routed
1584        // exclusively to tree-sitter (issue #899) and so has no catalog-level
1585        // syntect entry. Code-block highlighting in popups still finds the
1586        // syntect JS grammar through `SyntaxSet::find_syntax_by_token`, which
1587        // bypasses the catalog.
1588        let test_cases = [
1589            ("test.py", true),
1590            ("test.rs", true),
1591            ("test.js", false),
1592            ("test.json", true),
1593            ("test.md", true),
1594            ("test.html", true),
1595            ("test.css", true),
1596            ("test.gd", true),
1597            ("test.unknown_extension_xyz", false),
1598        ];
1599
1600        for (filename, should_exist) in test_cases {
1601            let path = Path::new(filename);
1602            let result = registry.find_syntax_for_file(path);
1603            assert_eq!(
1604                result.is_some(),
1605                should_exist,
1606                "Expected {:?} for {}",
1607                should_exist,
1608                filename
1609            );
1610        }
1611    }
1612
1613    #[test]
1614    fn test_racket_grammar_loaded() {
1615        let registry = GrammarRegistry::default();
1616        for filename in ["main.rkt", "data.rktd", "info.rktl", "doc.scrbl"] {
1617            let result = registry.find_syntax_for_file(Path::new(filename));
1618            assert!(
1619                result.is_some(),
1620                "Racket grammar should be available for {}",
1621                filename
1622            );
1623            let entry = registry.find_by_path(Path::new(filename), None).unwrap();
1624            assert_eq!(entry.display_name, "Racket", "for {}", filename);
1625        }
1626    }
1627
1628    #[test]
1629    fn test_syntax_set_arc() {
1630        let registry = GrammarRegistry::default();
1631        let arc1 = registry.syntax_set_arc();
1632        let arc2 = registry.syntax_set_arc();
1633        // Both should point to the same data
1634        assert!(Arc::ptr_eq(&arc1, &arc2));
1635    }
1636
1637    #[test]
1638    fn test_shell_dotfiles_detection() {
1639        let registry = GrammarRegistry::default();
1640
1641        // All these should be detected as shell scripts
1642        let shell_files = [".zshrc", ".zprofile", ".zshenv", ".bash_aliases"];
1643
1644        for filename in shell_files {
1645            let path = Path::new(filename);
1646            let result = registry.find_syntax_for_file(path);
1647            assert!(
1648                result.is_some(),
1649                "{} should be detected as a syntax",
1650                filename
1651            );
1652            let syntax = result.unwrap();
1653            // Should be detected as Bash/Shell
1654            assert!(
1655                syntax.name.to_lowercase().contains("bash")
1656                    || syntax.name.to_lowercase().contains("shell"),
1657                "{} should be detected as shell/bash, got: {}",
1658                filename,
1659                syntax.name
1660            );
1661        }
1662    }
1663
1664    #[test]
1665    fn test_pkgbuild_detection() {
1666        let registry = GrammarRegistry::default();
1667
1668        // PKGBUILD and APKBUILD should be detected as shell scripts
1669        for filename in ["PKGBUILD", "APKBUILD"] {
1670            let path = Path::new(filename);
1671            let result = registry.find_syntax_for_file(path);
1672            assert!(
1673                result.is_some(),
1674                "{} should be detected as a syntax",
1675                filename
1676            );
1677            let syntax = result.unwrap();
1678            // Should be detected as Bash/Shell
1679            assert!(
1680                syntax.name.to_lowercase().contains("bash")
1681                    || syntax.name.to_lowercase().contains("shell"),
1682                "{} should be detected as shell/bash, got: {}",
1683                filename,
1684                syntax.name
1685            );
1686        }
1687    }
1688
1689    #[test]
1690    fn test_find_syntax_with_glob_filenames() {
1691        let mut registry = GrammarRegistry::default();
1692        let mut languages = std::collections::HashMap::new();
1693        languages.insert(
1694            "shell-configs".to_string(),
1695            crate::config::LanguageConfig {
1696                extensions: vec!["sh".to_string()],
1697                filenames: vec!["*.conf".to_string(), "*rc".to_string()],
1698                grammar: "bash".to_string(),
1699                comment_prefix: Some("#".to_string()),
1700                auto_indent: true,
1701                auto_close: None,
1702                auto_surround: None,
1703                textmate_grammar: None,
1704                show_whitespace_tabs: true,
1705                line_wrap: None,
1706                wrap_column: None,
1707                page_view: None,
1708                page_width: None,
1709                use_tabs: None,
1710                tab_size: None,
1711                formatter: None,
1712                format_on_save: false,
1713                on_save: vec![],
1714                word_characters: None,
1715                indent: None,
1716            },
1717        );
1718        registry.apply_language_config(&languages);
1719
1720        assert!(
1721            registry
1722                .find_by_path(Path::new("nftables.conf"), None)
1723                .is_some(),
1724            "*.conf should match nftables.conf"
1725        );
1726        assert!(
1727            registry.find_by_path(Path::new("lfrc"), None).is_some(),
1728            "*rc should match lfrc"
1729        );
1730        // Unrelated file shouldn't panic.
1731        let _ = registry.find_by_path(Path::new("randomfile"), None);
1732    }
1733
1734    #[test]
1735    fn test_find_syntax_with_path_glob_filenames() {
1736        let mut registry = GrammarRegistry::default();
1737        let mut languages = std::collections::HashMap::new();
1738        languages.insert(
1739            "shell-configs".to_string(),
1740            crate::config::LanguageConfig {
1741                extensions: vec!["sh".to_string()],
1742                filenames: vec!["/etc/**/rc.*".to_string()],
1743                grammar: "bash".to_string(),
1744                comment_prefix: Some("#".to_string()),
1745                auto_indent: true,
1746                auto_close: None,
1747                auto_surround: None,
1748                textmate_grammar: None,
1749                show_whitespace_tabs: true,
1750                line_wrap: None,
1751                wrap_column: None,
1752                page_view: None,
1753                page_width: None,
1754                use_tabs: None,
1755                tab_size: None,
1756                formatter: None,
1757                format_on_save: false,
1758                on_save: vec![],
1759                word_characters: None,
1760                indent: None,
1761            },
1762        );
1763        registry.apply_language_config(&languages);
1764
1765        assert!(
1766            registry
1767                .find_by_path(Path::new("/etc/rc.conf"), None)
1768                .is_some(),
1769            "/etc/**/rc.* should match /etc/rc.conf"
1770        );
1771        assert!(
1772            registry
1773                .find_by_path(Path::new("/etc/init/rc.local"), None)
1774                .is_some(),
1775            "/etc/**/rc.* should match /etc/init/rc.local"
1776        );
1777        let _ = registry.find_by_path(Path::new("/var/rc.conf"), None);
1778    }
1779
1780    #[test]
1781    fn test_exact_filename_takes_priority_over_glob() {
1782        let mut registry = GrammarRegistry::default();
1783        let mut languages = std::collections::HashMap::new();
1784
1785        // A language with exact filename "lfrc" -> python grammar
1786        languages.insert(
1787            "custom-lfrc".to_string(),
1788            crate::config::LanguageConfig {
1789                extensions: vec![],
1790                filenames: vec!["lfrc".to_string()],
1791                grammar: "python".to_string(),
1792                comment_prefix: Some("#".to_string()),
1793                auto_indent: true,
1794                auto_close: None,
1795                auto_surround: None,
1796                textmate_grammar: None,
1797                show_whitespace_tabs: true,
1798                line_wrap: None,
1799                wrap_column: None,
1800                page_view: None,
1801                page_width: None,
1802                use_tabs: None,
1803                tab_size: None,
1804                formatter: None,
1805                format_on_save: false,
1806                on_save: vec![],
1807                word_characters: None,
1808                indent: None,
1809            },
1810        );
1811
1812        // A language with glob "*rc" -> bash grammar
1813        languages.insert(
1814            "rc-files".to_string(),
1815            crate::config::LanguageConfig {
1816                extensions: vec![],
1817                filenames: vec!["*rc".to_string()],
1818                grammar: "bash".to_string(),
1819                comment_prefix: Some("#".to_string()),
1820                auto_indent: true,
1821                auto_close: None,
1822                auto_surround: None,
1823                textmate_grammar: None,
1824                show_whitespace_tabs: true,
1825                line_wrap: None,
1826                wrap_column: None,
1827                page_view: None,
1828                page_width: None,
1829                use_tabs: None,
1830                tab_size: None,
1831                formatter: None,
1832                format_on_save: false,
1833                on_save: vec![],
1834                word_characters: None,
1835                indent: None,
1836            },
1837        );
1838
1839        registry.apply_language_config(&languages);
1840
1841        // "lfrc" should match the exact rule (python), not the glob (bash)
1842        let entry = registry.find_by_path(Path::new("lfrc"), None).unwrap();
1843        assert!(
1844            entry.display_name.to_lowercase().contains("python"),
1845            "exact match should win over glob, got: {}",
1846            entry.display_name
1847        );
1848    }
1849
1850    #[test]
1851    fn test_built_in_aliases_resolve() {
1852        let registry = GrammarRegistry::default();
1853
1854        // "bash" should resolve to "Bourne Again Shell (bash)" via alias
1855        let syntax = registry.find_syntax_by_name("bash");
1856        assert!(syntax.is_some(), "alias 'bash' should resolve");
1857        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1858
1859        // "cpp" should resolve to "C++"
1860        let syntax = registry.find_syntax_by_name("cpp");
1861        assert!(syntax.is_some(), "alias 'cpp' should resolve");
1862        assert_eq!(syntax.unwrap().name, "C++");
1863
1864        // "csharp" should resolve to "C#"
1865        let syntax = registry.find_syntax_by_name("csharp");
1866        assert!(syntax.is_some(), "alias 'csharp' should resolve");
1867        assert_eq!(syntax.unwrap().name, "C#");
1868
1869        // "sh" should also resolve to bash
1870        let syntax = registry.find_syntax_by_name("sh");
1871        assert!(syntax.is_some(), "alias 'sh' should resolve");
1872        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1873
1874        // "proto" should resolve to "Protocol Buffers"
1875        let syntax = registry.find_syntax_by_name("proto");
1876        assert!(syntax.is_some(), "alias 'proto' should resolve");
1877        assert_eq!(syntax.unwrap().name, "Protocol Buffers");
1878    }
1879
1880    #[test]
1881    fn test_alias_case_insensitive_input() {
1882        let registry = GrammarRegistry::default();
1883
1884        // Aliases should be case-insensitive on input
1885        let syntax = registry.find_syntax_by_name("BASH");
1886        assert!(
1887            syntax.is_some(),
1888            "alias 'BASH' should resolve case-insensitively"
1889        );
1890        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1891
1892        let syntax = registry.find_syntax_by_name("Cpp");
1893        assert!(
1894            syntax.is_some(),
1895            "alias 'Cpp' should resolve case-insensitively"
1896        );
1897        assert_eq!(syntax.unwrap().name, "C++");
1898    }
1899
1900    #[test]
1901    fn test_full_name_still_works() {
1902        let registry = GrammarRegistry::default();
1903
1904        // Full names should still work (exact match)
1905        let syntax = registry.find_syntax_by_name("Bourne Again Shell (bash)");
1906        assert!(syntax.is_some(), "full name should still resolve");
1907        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1908
1909        // Case-insensitive full name should still work
1910        let syntax = registry.find_syntax_by_name("bourne again shell (bash)");
1911        assert!(
1912            syntax.is_some(),
1913            "case-insensitive full name should resolve"
1914        );
1915        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1916    }
1917
1918    #[test]
1919    fn test_alias_does_not_shadow_full_names() {
1920        let registry = GrammarRegistry::default();
1921
1922        // "Rust" should resolve directly via case-insensitive match, not via alias
1923        let syntax = registry.find_syntax_by_name("rust");
1924        assert!(syntax.is_some());
1925        assert_eq!(syntax.unwrap().name, "Rust");
1926
1927        // "Go" should resolve directly
1928        let syntax = registry.find_syntax_by_name("go");
1929        assert!(syntax.is_some());
1930        assert_eq!(syntax.unwrap().name, "Go");
1931    }
1932
1933    #[test]
1934    fn test_register_alias_rejects_collision() {
1935        let mut registry = GrammarRegistry::default();
1936
1937        // Trying to register an alias that maps to two different targets should fail
1938        assert!(registry.register_alias("myalias", "Rust"));
1939        assert!(!registry.register_alias("myalias", "Go"));
1940
1941        // Same mapping is fine (idempotent)
1942        assert!(registry.register_alias("myalias", "Rust"));
1943    }
1944
1945    #[test]
1946    fn test_register_alias_rejects_nonexistent_target() {
1947        let mut registry = GrammarRegistry::default();
1948        assert!(!registry.register_alias("nope", "Nonexistent Grammar"));
1949    }
1950
1951    #[test]
1952    fn test_register_alias_skips_existing_grammar_name() {
1953        let mut registry = GrammarRegistry::default();
1954
1955        // "rust" case-insensitively matches the grammar "Rust", so no alias needed
1956        assert!(!registry.register_alias("rust", "Rust"));
1957        // Should still be resolvable via case-insensitive match
1958        assert!(registry.find_syntax_by_name("rust").is_some());
1959    }
1960
1961    #[test]
1962    fn test_available_grammar_info_includes_short_names() {
1963        let registry = GrammarRegistry::default();
1964        let infos = registry.available_grammar_info();
1965
1966        let bash_info = infos.iter().find(|g| g.name == "Bourne Again Shell (bash)");
1967        assert!(bash_info.is_some(), "bash grammar should be in the list");
1968        let bash_info = bash_info.unwrap();
1969        assert!(
1970            bash_info.short_name.is_some(),
1971            "bash grammar should have a short_name"
1972        );
1973        // The shortest alias for bash is "sh"
1974        assert_eq!(bash_info.short_name.as_deref(), Some("sh"));
1975    }
1976
1977    #[test]
1978    fn test_catalog_contains_each_language_once() {
1979        let registry = GrammarRegistry::default();
1980        let catalog = registry.catalog();
1981
1982        // Every catalog entry must have a unique (case-insensitive) display name.
1983        let mut seen = std::collections::HashSet::new();
1984        for entry in catalog {
1985            let key = entry.display_name.to_lowercase();
1986            assert!(
1987                seen.insert(key.clone()),
1988                "duplicate catalog entry for display_name={:?}",
1989                entry.display_name
1990            );
1991        }
1992
1993        // TypeScript is tree-sitter-only (syntect ships no grammar for it) yet
1994        // must still appear in the catalog.
1995        let ts = registry
1996            .find_by_name("TypeScript")
1997            .expect("TypeScript must be in the catalog");
1998        assert!(ts.engines.syntect.is_none());
1999        assert_eq!(
2000            ts.engines.tree_sitter,
2001            Some(fresh_languages::Language::TypeScript)
2002        );
2003        assert_eq!(ts.language_id, "typescript");
2004        assert!(ts.extensions.iter().any(|e| e == "ts"));
2005
2006        // Languages that exist in both syntect and tree-sitter (Rust, Python)
2007        // must appear exactly once and prefer the syntect engine.
2008        for name in ["Rust", "Python"] {
2009            let entry = registry
2010                .find_by_name(name)
2011                .unwrap_or_else(|| panic!("{} must be in the catalog", name));
2012            assert!(
2013                entry.engines.syntect.is_some(),
2014                "{} should have a syntect index",
2015                name
2016            );
2017            assert!(
2018                entry.engines.tree_sitter.is_some(),
2019                "{} should also have a tree-sitter language",
2020                name
2021            );
2022            // Only one entry with this display name (already checked above),
2023            // but also verify language_id lookup lands on the same entry.
2024            let by_id = registry
2025                .find_by_name(&entry.language_id)
2026                .expect("language_id should resolve");
2027            assert_eq!(by_id.display_name, entry.display_name);
2028        }
2029
2030        // JavaScript is deliberately routed to tree-sitter only — the
2031        // bundled syntect JavaScript grammar mishandles certain template
2032        // literals and bleeds string state into the rest of the file
2033        // (issue #899). The catalog must therefore expose a tree-sitter-only
2034        // entry, even though syntect ships a JavaScript grammar.
2035        let js = registry
2036            .find_by_name("JavaScript")
2037            .expect("JavaScript must be in the catalog");
2038        assert!(
2039            js.engines.syntect.is_none(),
2040            "JavaScript must not be routed to the syntect engine (issue #899)"
2041        );
2042        assert_eq!(
2043            js.engines.tree_sitter,
2044            Some(fresh_languages::Language::JavaScript),
2045            "JavaScript must carry the tree-sitter language"
2046        );
2047
2048        let gdscript = registry
2049            .find_by_path(Path::new("player.gd"), None)
2050            .expect("player.gd should resolve to GDScript");
2051        assert_eq!(gdscript.display_name, "GDScript");
2052        assert_eq!(gdscript.language_id, "gdscript");
2053        assert!(
2054            gdscript.engines.syntect.is_some(),
2055            "GDScript should use the embedded Syntect grammar"
2056        );
2057        assert!(
2058            gdscript.engines.tree_sitter.is_none(),
2059            "GDScript must not carry a tree-sitter parser"
2060        );
2061    }
2062
2063    #[test]
2064    fn test_catalog_find_by_path_and_extension() {
2065        let registry = GrammarRegistry::default();
2066        let ts = registry
2067            .find_by_path(Path::new("foo.ts"), None)
2068            .expect("foo.ts should resolve");
2069        assert_eq!(ts.display_name, "TypeScript");
2070        let rs = registry.find_by_extension("rs").expect("rs should resolve");
2071        assert_eq!(rs.display_name, "Rust");
2072    }
2073
2074    /// Build a minimal LanguageConfig for tests.
2075    fn lang_cfg(
2076        grammar: &str,
2077        extensions: &[&str],
2078        filenames: &[&str],
2079    ) -> crate::config::LanguageConfig {
2080        crate::config::LanguageConfig {
2081            extensions: extensions.iter().map(|s| s.to_string()).collect(),
2082            filenames: filenames.iter().map(|s| s.to_string()).collect(),
2083            grammar: grammar.to_string(),
2084            comment_prefix: None,
2085            auto_indent: true,
2086            auto_close: None,
2087            auto_surround: None,
2088            textmate_grammar: None,
2089            show_whitespace_tabs: true,
2090            line_wrap: None,
2091            wrap_column: None,
2092            page_view: None,
2093            page_width: None,
2094            use_tabs: None,
2095            tab_size: None,
2096            formatter: None,
2097            format_on_save: false,
2098            on_save: vec![],
2099            word_characters: None,
2100            indent: None,
2101        }
2102    }
2103
2104    /// Bug #1: a user-declared config key that aliases an existing grammar
2105    /// (e.g. `[languages.mylang] grammar = "Rust"`) must resolve via
2106    /// `find_by_name("mylang")` so the language palette can select it.
2107    #[test]
2108    fn test_user_alias_resolves_via_find_by_name() {
2109        let mut registry = GrammarRegistry::default();
2110        let mut languages = std::collections::HashMap::new();
2111        languages.insert("mylang".to_string(), lang_cfg("Rust", &[], &[]));
2112        registry.apply_language_config(&languages);
2113
2114        let entry = registry
2115            .find_by_name("mylang")
2116            .expect("user-declared alias 'mylang' must resolve");
2117        assert_eq!(entry.display_name, "Rust");
2118    }
2119
2120    /// Bug #2: `register_alias` used to rebuild the catalog from scratch,
2121    /// wiping out everything `apply_language_config` had merged. Registering
2122    /// an alias afterwards must not lose user config.
2123    #[test]
2124    fn test_register_alias_preserves_applied_language_config() {
2125        let mut registry = GrammarRegistry::default();
2126        let mut languages = std::collections::HashMap::new();
2127        languages.insert(
2128            "shell-configs".to_string(),
2129            lang_cfg("bash", &["myconf"], &["*.myconf"]),
2130        );
2131        registry.apply_language_config(&languages);
2132
2133        // Sanity: config applied.
2134        assert!(registry.find_by_extension("myconf").is_some());
2135        assert!(
2136            registry
2137                .find_by_path(Path::new("foo.myconf"), None)
2138                .is_some(),
2139            "glob should match before register_alias"
2140        );
2141
2142        // Registering an alias must not erase the config we just applied.
2143        registry.register_alias("mycustom", "Rust");
2144
2145        assert!(
2146            registry.find_by_extension("myconf").is_some(),
2147            "config extension must survive register_alias"
2148        );
2149        assert!(
2150            registry
2151                .find_by_path(Path::new("foo.myconf"), None)
2152                .is_some(),
2153            "glob must survive register_alias"
2154        );
2155    }
2156
2157    /// Bug #4: `from_syntax_name` used to unconditionally overwrite the
2158    /// catalog's canonical display name with whatever the user typed (e.g.
2159    /// "BASH") — that string ended up in the status bar.
2160    #[test]
2161    fn test_from_syntax_name_preserves_canonical_display_name() {
2162        use crate::primitives::detected_language::DetectedLanguage;
2163        let registry = GrammarRegistry::default();
2164        let languages = std::collections::HashMap::new();
2165
2166        let detected = DetectedLanguage::from_syntax_name("BASH", &registry, &languages)
2167            .expect("BASH should resolve via alias");
2168        assert_eq!(
2169            detected.display_name, "Bourne Again Shell (bash)",
2170            "display_name must be canonical, not user-typed"
2171        );
2172    }
2173
2174    /// A config-only language (no matching syntect grammar) must still appear
2175    /// in the catalog so the language palette can offer it — the old
2176    /// `DetectedLanguage::from_config_language` branch was load-bearing.
2177    #[test]
2178    fn test_config_only_language_appears_in_catalog() {
2179        let mut registry = GrammarRegistry::default();
2180        let mut languages = std::collections::HashMap::new();
2181        // "fish" isn't in syntect; grammar="fish" doesn't resolve either.
2182        languages.insert("fish".to_string(), lang_cfg("fish", &["fish"], &[]));
2183        registry.apply_language_config(&languages);
2184
2185        let entry = registry
2186            .find_by_name("fish")
2187            .expect("fish should be in the catalog after apply_language_config");
2188        assert!(entry.engines.syntect.is_none());
2189        assert!(entry.engines.tree_sitter.is_none());
2190        assert_eq!(entry.language_id, "fish");
2191        assert!(entry.extensions.iter().any(|e| e == "fish"));
2192    }
2193
2194    /// Config-declared extensions must override the built-in mapping. If the
2195    /// user says `[languages.typescript-overlay] extensions = ["js"] grammar
2196    /// = "TypeScript"`, then `foo.js` must resolve to TypeScript, not
2197    /// JavaScript.
2198    #[test]
2199    fn test_config_extension_overrides_builtin() {
2200        let mut registry = GrammarRegistry::default();
2201        // Sanity: default mapping is JavaScript.
2202        assert_eq!(
2203            registry.find_by_extension("js").unwrap().display_name,
2204            "JavaScript"
2205        );
2206
2207        let mut languages = std::collections::HashMap::new();
2208        languages.insert(
2209            "ts-overlay".to_string(),
2210            lang_cfg("TypeScript", &["js"], &[]),
2211        );
2212        registry.apply_language_config(&languages);
2213
2214        assert_eq!(
2215            registry.find_by_extension("js").unwrap().display_name,
2216            "TypeScript",
2217            "user-config extension must win over built-in"
2218        );
2219    }
2220
2221    /// Bare filenames listed by syntect grammars (e.g. "Gemfile", "Makefile",
2222    /// "Rakefile") must resolve through `find_by_path`. Syntect stores these
2223    /// in each grammar's `file_extensions` field alongside real extensions
2224    /// like "rb"; its own `find_syntax_for_file` treats them as either. The
2225    /// catalog has to do the same or `HighlightEngine::for_file` breaks for
2226    /// every extensionless config file.
2227    #[test]
2228    fn test_bare_filename_resolves_via_find_by_path() {
2229        let registry = GrammarRegistry::default();
2230        for (filename, expected_substr) in [
2231            ("Gemfile", "ruby"),
2232            ("Rakefile", "ruby"),
2233            ("Vagrantfile", "ruby"),
2234            ("Makefile", "makefile"),
2235            ("GNUmakefile", "makefile"),
2236        ] {
2237            let entry = registry
2238                .find_by_path(Path::new(filename), None)
2239                .unwrap_or_else(|| panic!("{} must resolve via catalog", filename));
2240            assert!(
2241                entry.display_name.to_lowercase().contains(expected_substr),
2242                "{} should resolve to {} grammar, got {}",
2243                filename,
2244                expected_substr,
2245                entry.display_name
2246            );
2247        }
2248    }
2249
2250    /// Languages that have both syntect and tree-sitter (e.g. JavaScript) must
2251    /// expose the union of both engines' extensions. Tree-sitter-javascript
2252    /// knows `.jsx`; syntect's JavaScript grammar does not. Both should route
2253    /// through the JavaScript catalog entry.
2254    #[test]
2255    fn test_jsx_resolves_to_javascript() {
2256        let registry = GrammarRegistry::default();
2257        let entry = registry
2258            .find_by_path(Path::new("foo.jsx"), None)
2259            .expect("foo.jsx must resolve");
2260        assert_eq!(entry.display_name, "JavaScript");
2261    }
2262
2263    /// `rebuild_catalog` must replay the last-applied language config so it
2264    /// can never silently wipe user `[languages]` rules. This is the invariant
2265    /// that keeps `register_alias`, `populate_built_in_aliases`, and any
2266    /// future rebuild callsite safe-by-construction.
2267    #[test]
2268    fn test_rebuild_catalog_replays_language_config() {
2269        let mut registry = GrammarRegistry::default();
2270        let mut languages = std::collections::HashMap::new();
2271        languages.insert(
2272            "myshell".to_string(),
2273            lang_cfg("bash", &["myext"], &["*.myglob"]),
2274        );
2275        registry.apply_language_config(&languages);
2276        assert!(registry.find_by_extension("myext").is_some());
2277        assert!(registry
2278            .find_by_path(Path::new("foo.myglob"), None)
2279            .is_some());
2280
2281        // Force a rebuild — the catalog gets wiped and re-populated from
2282        // syntect / tree-sitter, but user config must come back on top.
2283        registry.rebuild_catalog();
2284        assert!(
2285            registry.find_by_extension("myext").is_some(),
2286            "rebuild_catalog must replay applied user config"
2287        );
2288        assert!(
2289            registry
2290                .find_by_path(Path::new("foo.myglob"), None)
2291                .is_some(),
2292            "rebuild_catalog must replay user globs"
2293        );
2294    }
2295
2296    /// `apply_language_config` must be idempotent: calling it twice with the
2297    /// same config yields the same catalog state.
2298    #[test]
2299    fn test_apply_language_config_idempotent() {
2300        let mut registry = GrammarRegistry::default();
2301        let mut languages = std::collections::HashMap::new();
2302        languages.insert(
2303            "shell-cfg".to_string(),
2304            lang_cfg("bash", &["myconf"], &["*.myconf"]),
2305        );
2306
2307        registry.apply_language_config(&languages);
2308        let first_extensions = registry
2309            .find_by_name("bash")
2310            .unwrap()
2311            .extensions
2312            .iter()
2313            .filter(|e| e == &"myconf")
2314            .count();
2315        let first_globs = registry
2316            .find_by_name("bash")
2317            .unwrap()
2318            .filename_globs
2319            .iter()
2320            .filter(|g| g == &"*.myconf")
2321            .count();
2322        assert_eq!(first_extensions, 1);
2323        assert_eq!(first_globs, 1);
2324
2325        // Second call must not duplicate anything.
2326        registry.apply_language_config(&languages);
2327        let second_extensions = registry
2328            .find_by_name("bash")
2329            .unwrap()
2330            .extensions
2331            .iter()
2332            .filter(|e| e == &"myconf")
2333            .count();
2334        let second_globs = registry
2335            .find_by_name("bash")
2336            .unwrap()
2337            .filename_globs
2338            .iter()
2339            .filter(|g| g == &"*.myconf")
2340            .count();
2341        assert_eq!(second_extensions, 1, "extensions must not duplicate");
2342        assert_eq!(second_globs, 1, "globs must not duplicate");
2343    }
2344
2345    /// Julia: a single-quote after an identifier is the adjoint
2346    /// (conjugate-transpose) postfix operator, not the start of a string. The
2347    /// old grammar pushed a string context on every `'`, so `A'` swallowed
2348    /// the rest of the file until the next quote — wrecking highlighting for
2349    /// any subsequent keyword. Issue #1852.
2350    #[test]
2351    fn test_julia_adjoint_does_not_start_string() {
2352        use syntect::parsing::{ParseState, ScopeStack};
2353
2354        let registry = GrammarRegistry::default();
2355        let syntax_set = registry.syntax_set();
2356        let syntax = registry
2357            .find_syntax_by_name("Julia")
2358            .expect("Julia grammar must be loaded");
2359        let mut state = ParseState::new(syntax);
2360        let mut scopes = ScopeStack::new();
2361
2362        // Adjoint operator followed by code on later lines.
2363        let lines = ["x = A'\n", "function foo()\n", "end\n"];
2364        let mut keyword_line_in_string = false;
2365        let mut found_function_keyword = false;
2366
2367        for line in &lines {
2368            let ops = state.parse_line(line, syntax_set).unwrap();
2369            // Walk byte-by-byte, applying ops as we pass their offset.
2370            let mut op_iter = ops.iter().peekable();
2371            for (byte_idx, _) in line.char_indices() {
2372                while let Some((offset, op)) = op_iter.peek() {
2373                    if *offset <= byte_idx {
2374                        scopes.apply(op).unwrap();
2375                        op_iter.next();
2376                    } else {
2377                        break;
2378                    }
2379                }
2380                let in_string = scopes
2381                    .as_slice()
2382                    .iter()
2383                    .any(|s| s.build_string().starts_with("string."));
2384                let is_function_kw = line[byte_idx..].starts_with("function");
2385                if is_function_kw && in_string {
2386                    keyword_line_in_string = true;
2387                }
2388                if is_function_kw && !in_string {
2389                    found_function_keyword = true;
2390                }
2391            }
2392            // Drain remaining ops at end of line.
2393            for (_, op) in op_iter {
2394                scopes.apply(op).unwrap();
2395            }
2396        }
2397
2398        assert!(
2399            !keyword_line_in_string,
2400            "the `function` keyword after an adjoint operator must not be inside a string scope"
2401        );
2402        assert!(
2403            found_function_keyword,
2404            "test harness must have reached the `function` keyword"
2405        );
2406    }
2407
2408    /// Julia: `'a'` is a valid character literal. The grammar must still
2409    /// scope it as a constant/character so themes can color it. Issue #1852.
2410    #[test]
2411    fn test_julia_char_literal_is_recognized() {
2412        use syntect::parsing::{ParseState, ScopeStack};
2413
2414        let registry = GrammarRegistry::default();
2415        let syntax_set = registry.syntax_set();
2416        let syntax = registry
2417            .find_syntax_by_name("Julia")
2418            .expect("Julia grammar must be loaded");
2419        let mut state = ParseState::new(syntax);
2420        let mut scopes = ScopeStack::new();
2421
2422        let line = "x = 'a'\n";
2423        let ops = state.parse_line(line, syntax_set).unwrap();
2424        let mut saw_constant_or_string_at_quote = false;
2425        let mut op_iter = ops.iter().peekable();
2426        for (byte_idx, _) in line.char_indices() {
2427            while let Some((offset, op)) = op_iter.peek() {
2428                if *offset <= byte_idx {
2429                    scopes.apply(op).unwrap();
2430                    op_iter.next();
2431                } else {
2432                    break;
2433                }
2434            }
2435            if byte_idx == 5 {
2436                // position of 'a' (the char)
2437                let scoped = scopes.as_slice().iter().any(|s| {
2438                    let str = s.build_string();
2439                    str.starts_with("constant.") || str.starts_with("string.")
2440                });
2441                if scoped {
2442                    saw_constant_or_string_at_quote = true;
2443                }
2444            }
2445        }
2446        assert!(
2447            saw_constant_or_string_at_quote,
2448            "char literal 'a' must receive a constant/string scope"
2449        );
2450    }
2451
2452    /// `tree_sitter_for_syntect_name` handles the alias table + strict
2453    /// display-name match. The alias table catches syntect's verbose names;
2454    /// the strict match handles the common case.
2455    #[test]
2456    fn test_tree_sitter_bridge() {
2457        assert_eq!(
2458            tree_sitter_for_syntect_name("Bourne Again Shell (bash)"),
2459            Some(fresh_languages::Language::Bash)
2460        );
2461        assert_eq!(
2462            tree_sitter_for_syntect_name("Rust"),
2463            Some(fresh_languages::Language::Rust)
2464        );
2465        assert_eq!(tree_sitter_for_syntect_name("GDScript"), None);
2466        // Must NOT fuzzy-match Nushell to Bash.
2467        assert_eq!(tree_sitter_for_syntect_name("Nushell"), None);
2468        // Must NOT match arbitrary strings.
2469        assert_eq!(tree_sitter_for_syntect_name("does-not-exist"), None);
2470    }
2471}