Skip to main content

fresh/primitives/grammar/
types.rs

1//! Pure grammar registry types without I/O operations.
2//!
3//! This module contains the `GrammarRegistry` struct and all syntax lookup methods
4//! that don't require filesystem access. This enables WASM compatibility and easier testing.
5
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10use syntect::parsing::{SyntaxDefinition, SyntaxReference, SyntaxSet, SyntaxSetBuilder};
11
12// Re-export glob matching utilities for use by other modules
13pub use crate::primitives::glob_match::{
14    filename_glob_matches, is_glob_pattern, is_path_pattern, path_glob_matches,
15};
16
17/// A grammar specification: language name, path to grammar file, and associated file extensions.
18///
19/// Used to pass grammar information between the plugin layer, loader, and registry
20/// without relying on anonymous tuples.
21#[derive(Clone, Debug)]
22pub struct GrammarSpec {
23    /// Language identifier (e.g., "elixir")
24    pub language: String,
25    /// Path to the grammar file (.sublime-syntax)
26    pub path: PathBuf,
27    /// File extensions to associate with this grammar (e.g., ["ex", "exs"])
28    pub extensions: Vec<String>,
29}
30
31/// Where a grammar was loaded from.
32#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
33#[serde(tag = "type")]
34pub enum GrammarSource {
35    /// Built-in to Fresh (pre-compiled syntect defaults + embedded grammars)
36    #[serde(rename = "built-in")]
37    BuiltIn,
38    /// Installed from a user grammar directory (~/.config/fresh/grammars/)
39    #[serde(rename = "user")]
40    User { path: PathBuf },
41    /// From a language pack (~/.config/fresh/languages/packages/)
42    #[serde(rename = "language-pack")]
43    LanguagePack { name: String, path: PathBuf },
44    /// From a bundle package (~/.config/fresh/bundles/packages/)
45    #[serde(rename = "bundle")]
46    Bundle { name: String, path: PathBuf },
47    /// Registered by a plugin at runtime
48    #[serde(rename = "plugin")]
49    Plugin { plugin: String, path: PathBuf },
50}
51
52impl std::fmt::Display for GrammarSource {
53    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54        match self {
55            GrammarSource::BuiltIn => write!(f, "built-in"),
56            GrammarSource::User { path } => write!(f, "user ({})", path.display()),
57            GrammarSource::LanguagePack { name, .. } => write!(f, "language-pack ({})", name),
58            GrammarSource::Bundle { name, .. } => write!(f, "bundle ({})", name),
59            GrammarSource::Plugin { plugin, .. } => write!(f, "plugin ({})", plugin),
60        }
61    }
62}
63
64/// Information about an available grammar, including its provenance.
65#[derive(Clone, Debug, Serialize, Deserialize)]
66pub struct GrammarInfo {
67    /// The grammar name as used in config files (case-insensitive matching)
68    pub name: String,
69    /// Where this grammar was loaded from
70    pub source: GrammarSource,
71    /// File extensions associated with this grammar
72    pub file_extensions: Vec<String>,
73    /// Optional short name alias (e.g., "bash" for "Bourne Again Shell (bash)")
74    #[serde(default, skip_serializing_if = "Option::is_none")]
75    pub short_name: Option<String>,
76}
77
78/// Bridge between syntect display names and `fresh_languages::Language`.
79///
80/// Most syntect grammars map one-to-one: "Rust" → `Language::Rust`. A few
81/// have verbose display names that don't match the tree-sitter enum's
82/// `display_name()`, and `Language::from_name` has fuzzy "contains shell"
83/// fallbacks that would wrongly tag Nushell as tree-sitter Bash. This is
84/// the one place we spell the exceptions out explicitly.
85const SYNTECT_TO_TREE_SITTER_ALIASES: &[(&str, fresh_languages::Language)] =
86    &[("Bourne Again Shell (bash)", fresh_languages::Language::Bash)];
87
88/// Resolve a syntect syntax display name to a tree-sitter language, using
89/// strict equality against the alias table and `Language::display_name()`.
90fn tree_sitter_for_syntect_name(display_name: &str) -> Option<fresh_languages::Language> {
91    for (syntect_name, lang) in SYNTECT_TO_TREE_SITTER_ALIASES {
92        if *syntect_name == display_name {
93            return Some(*lang);
94        }
95    }
96    fresh_languages::Language::all()
97        .iter()
98        .find(|l| l.display_name() == display_name)
99        .copied()
100}
101
102/// Which highlighters can serve a given `GrammarEntry`.
103///
104/// A catalog entry may come from syntect (a TextMate grammar indexed into
105/// `SyntaxSet`), tree-sitter (a `fresh_languages::Language`), or both.
106#[derive(Clone, Debug, Default)]
107pub struct GrammarEngines {
108    /// Index into `GrammarRegistry::syntax_set().syntaxes()`, if a syntect
109    /// grammar is available.
110    pub syntect: Option<usize>,
111    /// Tree-sitter language, if one is registered for this grammar.
112    pub tree_sitter: Option<fresh_languages::Language>,
113}
114
115/// A single entry in the unified grammar catalog.
116///
117/// Each entry represents one logical language (e.g. "Rust", "TypeScript") and
118/// records which highlighting engines can serve it, plus the names/extensions
119/// used to look it up. The catalog is the single source of truth for grammar
120/// lookups — `find_by_name`, `find_by_path`, `find_by_extension` all return
121/// entries from here, and both `HighlightEngine::from_entry` and
122/// `DetectedLanguage::from_entry` consume them.
123#[derive(Clone, Debug)]
124pub struct GrammarEntry {
125    /// Human-readable display name (e.g. "TypeScript", "Bourne Again Shell (bash)").
126    pub display_name: String,
127    /// Canonical language ID used in config and LSP (e.g. "typescript", "csharp").
128    pub language_id: String,
129    /// Short alias, if one exists (e.g. "ts" for TypeScript).
130    pub short_name: Option<String>,
131    /// File extensions (without leading dot).
132    pub extensions: Vec<String>,
133    /// Exact filenames that map to this grammar (e.g. "Dockerfile").
134    pub filenames: Vec<String>,
135    /// Filename globs from user config (e.g. "*.conf", "/etc/**/rc.*").
136    pub filename_globs: Vec<String>,
137    /// Where this grammar was loaded from.
138    pub source: GrammarSource,
139    /// Highlighters that can serve this entry.
140    pub engines: GrammarEngines,
141}
142
143/// Embedded TOML grammar (syntect doesn't include one)
144pub const TOML_GRAMMAR: &str = include_str!("../../grammars/toml.sublime-syntax");
145
146/// Embedded Odin grammar (syntect doesn't include one)
147/// From: https://github.com/Tetralux/sublime-odin (MIT License)
148pub const ODIN_GRAMMAR: &str = include_str!("../../grammars/odin/Odin.sublime-syntax");
149
150/// Embedded Zig grammar (syntect doesn't include one)
151pub const ZIG_GRAMMAR: &str = include_str!("../../grammars/zig.sublime-syntax");
152
153/// Embedded GDScript grammar
154/// Based on https://github.com/beefsack/GDScript-sublime (MIT License)
155pub const GDSCRIPT_GRAMMAR: &str = include_str!("../../grammars/gdscript.sublime-syntax");
156
157/// Embedded Git Rebase Todo grammar for interactive rebase
158pub const GIT_REBASE_GRAMMAR: &str = include_str!("../../grammars/git-rebase.sublime-syntax");
159
160/// Embedded Git Commit Message grammar for COMMIT_EDITMSG, MERGE_MSG, etc.
161pub const GIT_COMMIT_GRAMMAR: &str = include_str!("../../grammars/git-commit.sublime-syntax");
162
163/// Embedded Gitignore grammar for .gitignore and similar files
164pub const GITIGNORE_GRAMMAR: &str = include_str!("../../grammars/gitignore.sublime-syntax");
165
166/// Embedded Git Config grammar for .gitconfig, .gitmodules
167pub const GITCONFIG_GRAMMAR: &str = include_str!("../../grammars/gitconfig.sublime-syntax");
168
169/// Embedded Git Attributes grammar for .gitattributes
170pub const GITATTRIBUTES_GRAMMAR: &str = include_str!("../../grammars/gitattributes.sublime-syntax");
171
172/// Embedded Typst grammar (syntect doesn't include one)
173pub const TYPST_GRAMMAR: &str = include_str!("../../grammars/typst.sublime-syntax");
174
175/// Embedded Dockerfile grammar
176pub const DOCKERFILE_GRAMMAR: &str = include_str!("../../grammars/dockerfile.sublime-syntax");
177/// Embedded INI grammar (also handles .env, .cfg, .editorconfig, etc.)
178pub const INI_GRAMMAR: &str = include_str!("../../grammars/ini.sublime-syntax");
179/// Embedded CMake grammar
180pub const CMAKE_GRAMMAR: &str = include_str!("../../grammars/cmake.sublime-syntax");
181/// Embedded SCSS grammar
182pub const SCSS_GRAMMAR: &str = include_str!("../../grammars/scss.sublime-syntax");
183/// Embedded LESS grammar
184pub const LESS_GRAMMAR: &str = include_str!("../../grammars/less.sublime-syntax");
185/// Embedded PowerShell grammar
186pub const POWERSHELL_GRAMMAR: &str = include_str!("../../grammars/powershell.sublime-syntax");
187/// Embedded Kotlin grammar
188pub const KOTLIN_GRAMMAR: &str = include_str!("../../grammars/kotlin.sublime-syntax");
189/// Embedded Swift grammar
190pub const SWIFT_GRAMMAR: &str = include_str!("../../grammars/swift.sublime-syntax");
191/// Embedded Dart grammar
192pub const DART_GRAMMAR: &str = include_str!("../../grammars/dart.sublime-syntax");
193/// Embedded Elixir grammar
194pub const ELIXIR_GRAMMAR: &str = include_str!("../../grammars/elixir.sublime-syntax");
195/// Embedded F# grammar
196pub const FSHARP_GRAMMAR: &str = include_str!("../../grammars/fsharp.sublime-syntax");
197/// Embedded Nix grammar
198pub const NIX_GRAMMAR: &str = include_str!("../../grammars/nix.sublime-syntax");
199/// Embedded HCL/Terraform grammar
200pub const HCL_GRAMMAR: &str = include_str!("../../grammars/hcl.sublime-syntax");
201/// Embedded Protocol Buffers grammar
202pub const PROTOBUF_GRAMMAR: &str = include_str!("../../grammars/protobuf.sublime-syntax");
203/// Embedded GraphQL grammar
204pub const GRAPHQL_GRAMMAR: &str = include_str!("../../grammars/graphql.sublime-syntax");
205/// Embedded Julia grammar
206pub const JULIA_GRAMMAR: &str = include_str!("../../grammars/julia.sublime-syntax");
207/// Embedded Nim grammar
208pub const NIM_GRAMMAR: &str = include_str!("../../grammars/nim.sublime-syntax");
209/// Embedded Gleam grammar
210pub const GLEAM_GRAMMAR: &str = include_str!("../../grammars/gleam.sublime-syntax");
211/// Embedded V language grammar
212pub const VLANG_GRAMMAR: &str = include_str!("../../grammars/vlang.sublime-syntax");
213/// Embedded Solidity grammar
214pub const SOLIDITY_GRAMMAR: &str = include_str!("../../grammars/solidity.sublime-syntax");
215/// Embedded KDL grammar
216pub const KDL_GRAMMAR: &str = include_str!("../../grammars/kdl.sublime-syntax");
217/// Embedded Nushell grammar
218pub const NUSHELL_GRAMMAR: &str = include_str!("../../grammars/nushell.sublime-syntax");
219/// Embedded Smali grammar
220pub const SMALI_GRAMMAR: &str = include_str!("../../grammars/smali.sublime-syntax");
221/// Embedded Fish shell grammar
222pub const FISH_GRAMMAR: &str = include_str!("../../grammars/fish.sublime-syntax");
223/// Embedded Starlark/Bazel grammar
224pub const STARLARK_GRAMMAR: &str = include_str!("../../grammars/starlark.sublime-syntax");
225/// Embedded Justfile grammar
226pub const JUSTFILE_GRAMMAR: &str = include_str!("../../grammars/justfile.sublime-syntax");
227/// Embedded Earthfile grammar
228pub const EARTHFILE_GRAMMAR: &str = include_str!("../../grammars/earthfile.sublime-syntax");
229/// Embedded Go Module grammar
230pub const GOMOD_GRAMMAR: &str = include_str!("../../grammars/gomod.sublime-syntax");
231/// Embedded Vue grammar
232pub const VUE_GRAMMAR: &str = include_str!("../../grammars/vue.sublime-syntax");
233/// Embedded Svelte grammar
234pub const SVELTE_GRAMMAR: &str = include_str!("../../grammars/svelte.sublime-syntax");
235/// Embedded Astro grammar
236pub const ASTRO_GRAMMAR: &str = include_str!("../../grammars/astro.sublime-syntax");
237/// Embedded Hyprlang grammar (Hyprland config)
238pub const HYPRLANG_GRAMMAR: &str = include_str!("../../grammars/hyprlang.sublime-syntax");
239/// Embedded AutoHotkey grammar
240/// From: https://github.com/SALZKARTOFFEEEL/ahk-sublime-syntax (MIT License)
241pub const AUTOHOTKEY_GRAMMAR: &str =
242    include_str!("../../grammars/autohotkey/AutoHotkey.sublime-syntax");
243/// Embedded Racket grammar (syntect doesn't include one)
244pub const RACKET_GRAMMAR: &str = include_str!("../../grammars/racket.sublime-syntax");
245/// Embedded Verilog grammar (HDL)
246pub const VERILOG_GRAMMAR: &str = include_str!("../../grammars/verilog.sublime-syntax");
247/// Embedded SystemVerilog grammar (HDL)
248pub const SYSTEMVERILOG_GRAMMAR: &str = include_str!("../../grammars/systemverilog.sublime-syntax");
249/// Embedded VHDL grammar (HDL)
250pub const VHDL_GRAMMAR: &str = include_str!("../../grammars/vhdl.sublime-syntax");
251
252pub const C3_GRAMMAR: &str = include_str!("../../grammars/c3.sublime-syntax");
253
254/// Embedded Assembly grammar (GAS/AT&T and Intel/NASM dialects; syntect
255/// doesn't include one)
256pub const ASM_GRAMMAR: &str = include_str!("../../grammars/asm.sublime-syntax");
257
258/// Registry of all available TextMate grammars.
259///
260/// This struct holds the compiled syntax set and provides lookup methods.
261/// It does not perform I/O directly - use `GrammarLoader` for loading grammars.
262impl std::fmt::Debug for GrammarRegistry {
263    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
264        f.debug_struct("GrammarRegistry")
265            .field("syntax_count", &self.syntax_set.syntaxes().len())
266            .finish()
267    }
268}
269
270pub struct GrammarRegistry {
271    /// Combined syntax set (built-in + embedded + user grammars)
272    syntax_set: Arc<SyntaxSet>,
273    /// Extension -> scope name mapping for user grammars (takes priority)
274    user_extensions: HashMap<String, String>,
275    /// Filename -> scope name mapping for dotfiles and special files
276    filename_scopes: HashMap<String, String>,
277    /// Paths to dynamically loaded grammar files (for reloading when adding more)
278    loaded_grammar_paths: Vec<GrammarSpec>,
279    /// Provenance info for each grammar (keyed by grammar name)
280    grammar_sources: HashMap<String, GrammarInfo>,
281    /// Short name aliases: lowercase short_name -> full syntect grammar name.
282    /// Provides a deterministic, one-to-one mapping so users can write
283    /// `grammar = "bash"` instead of `grammar = "Bourne Again Shell (bash)"`.
284    aliases: HashMap<String, String>,
285    /// Unified catalog of every known grammar. Rebuilt whenever the syntax set
286    /// or alias table changes. Lookups (`find_by_name`, `find_by_path`, ...)
287    /// all resolve against this.
288    catalog: Vec<GrammarEntry>,
289    /// Index from lowercased lookup keys (display name, language_id, short_name)
290    /// to catalog index.
291    catalog_by_name: HashMap<String, usize>,
292    /// Index from file extension (without dot) to catalog index.
293    catalog_by_extension: HashMap<String, usize>,
294    /// Index from filename to catalog index.
295    catalog_by_filename: HashMap<String, usize>,
296    /// The most recent language config handed to `apply_language_config`.
297    /// Retained so `rebuild_catalog` can replay it — otherwise a rebuild
298    /// (triggered by e.g. `populate_built_in_aliases`) silently wipes user
299    /// `[languages]` config that was merged on top.
300    applied_language_config: HashMap<String, crate::config::LanguageConfig>,
301    /// Monotonic generation, bumped on every catalog mutation. Lets
302    /// observers (plugin state snapshot) detect changes with one integer
303    /// compare instead of recounting entries.
304    catalog_gen: u64,
305}
306
307impl GrammarRegistry {
308    /// Create a new GrammarRegistry from pre-built components.
309    ///
310    /// This is typically called by `GrammarLoader` implementations after
311    /// loading grammars from various sources.
312    pub(crate) fn new(
313        syntax_set: SyntaxSet,
314        user_extensions: HashMap<String, String>,
315        filename_scopes: HashMap<String, String>,
316    ) -> Self {
317        Self::new_with_loaded_paths(
318            syntax_set,
319            user_extensions,
320            filename_scopes,
321            Vec::new(),
322            HashMap::new(),
323        )
324    }
325
326    /// Create a GrammarRegistry with pre-loaded grammar path tracking.
327    ///
328    /// Used by the loader when plugin grammars were included in the initial build,
329    /// so that `loaded_grammar_paths()` reflects what was actually loaded.
330    pub(crate) fn new_with_loaded_paths(
331        syntax_set: SyntaxSet,
332        user_extensions: HashMap<String, String>,
333        filename_scopes: HashMap<String, String>,
334        loaded_grammar_paths: Vec<GrammarSpec>,
335        grammar_sources: HashMap<String, GrammarInfo>,
336    ) -> Self {
337        let mut reg = Self {
338            syntax_set: Arc::new(syntax_set),
339            user_extensions,
340            filename_scopes,
341            loaded_grammar_paths,
342            grammar_sources,
343            aliases: HashMap::new(),
344            catalog: Vec::new(),
345            catalog_by_name: HashMap::new(),
346            catalog_by_extension: HashMap::new(),
347            catalog_by_filename: HashMap::new(),
348            applied_language_config: HashMap::new(),
349            catalog_gen: 0,
350        };
351        reg.rebuild_catalog();
352        reg
353    }
354
355    /// Create an empty grammar registry (fast, for tests that don't need syntax highlighting)
356    pub fn empty() -> Arc<Self> {
357        let mut builder = SyntaxSetBuilder::new();
358        builder.add_plain_text_syntax();
359        let mut reg = Self {
360            syntax_set: Arc::new(builder.build()),
361            user_extensions: HashMap::new(),
362            filename_scopes: HashMap::new(),
363            loaded_grammar_paths: Vec::new(),
364            grammar_sources: HashMap::new(),
365            aliases: HashMap::new(),
366            catalog: Vec::new(),
367            catalog_by_name: HashMap::new(),
368            catalog_by_extension: HashMap::new(),
369            catalog_by_filename: HashMap::new(),
370            applied_language_config: HashMap::new(),
371            catalog_gen: 0,
372        };
373        reg.rebuild_catalog();
374        Arc::new(reg)
375    }
376
377    /// Create a registry with only syntect's pre-compiled defaults (~0ms).
378    ///
379    /// This provides instant syntax highlighting for ~50 common languages
380    /// (Rust, Python, JS/TS, C/C++, Go, Java, HTML, CSS, Markdown, etc.)
381    /// without any `SyntaxSetBuilder::build()` call. Use this at startup,
382    /// then swap in a full registry built on a background thread.
383    pub fn defaults_only() -> Arc<Self> {
384        // Load pre-compiled syntax set (defaults + embedded grammars) from
385        // build-time packdump. This avoids the expensive into_builder() + build()
386        // cycle at runtime (~12s → ~300ms).
387        tracing::info!("defaults_only: loading pre-compiled syntax packdump...");
388        let syntax_set: SyntaxSet = syntect::dumps::from_uncompressed_data(include_bytes!(
389            concat!(env!("OUT_DIR"), "/default_syntaxes.packdump")
390        ))
391        .expect("Failed to load pre-compiled syntax packdump");
392        tracing::info!(
393            "defaults_only: loaded ({} syntaxes)",
394            syntax_set.syntaxes().len()
395        );
396        let grammar_sources = Self::build_grammar_sources_from_syntax_set(&syntax_set);
397        let filename_scopes = Self::build_filename_scopes();
398        let extra_extensions = Self::build_extra_extensions();
399        let mut registry = Self {
400            syntax_set: Arc::new(syntax_set),
401            user_extensions: extra_extensions,
402            filename_scopes,
403            loaded_grammar_paths: Vec::new(),
404            grammar_sources,
405            aliases: HashMap::new(),
406            catalog: Vec::new(),
407            catalog_by_name: HashMap::new(),
408            catalog_by_extension: HashMap::new(),
409            catalog_by_filename: HashMap::new(),
410            applied_language_config: HashMap::new(),
411            catalog_gen: 0,
412        };
413        registry.populate_built_in_aliases();
414        registry.rebuild_catalog();
415        Arc::new(registry)
416    }
417
418    /// Build extra extension -> scope mappings for extensions not covered by syntect defaults.
419    ///
420    /// These map common file extensions to existing syntect grammar scopes,
421    /// filling gaps where syntect's built-in extension lists are incomplete.
422    pub(crate) fn build_extra_extensions() -> HashMap<String, String> {
423        let mut map = HashMap::new();
424
425        // JavaScript variants not in syntect defaults (["js", "htc"])
426        let js_scope = "source.js".to_string();
427        map.insert("cjs".to_string(), js_scope.clone());
428        map.insert("mjs".to_string(), js_scope);
429
430        // Dockerfile variants (e.g. Dockerfile.dev -> .dev extension)
431        // These won't match by extension, handled by filename_scopes and first_line_match
432
433        map
434    }
435
436    /// Build the default filename -> scope mappings for dotfiles and special files.
437    pub(crate) fn build_filename_scopes() -> HashMap<String, String> {
438        let mut map = HashMap::new();
439
440        // Shell configuration files -> Bash/Shell script scope
441        let shell_scope = "source.shell.bash".to_string();
442        for filename in [
443            ".zshrc",
444            ".zprofile",
445            ".zshenv",
446            ".zlogin",
447            ".zlogout",
448            ".bash_aliases",
449            // .bashrc and .bash_profile are already recognized by syntect
450            // Common shell script files without extensions
451            "PKGBUILD",
452            "APKBUILD",
453        ] {
454            map.insert(filename.to_string(), shell_scope.clone());
455        }
456
457        // Git rebase todo files
458        let git_rebase_scope = "source.git-rebase-todo".to_string();
459        map.insert("git-rebase-todo".to_string(), git_rebase_scope);
460
461        // Git commit message files
462        let git_commit_scope = "source.git-commit".to_string();
463        for filename in ["COMMIT_EDITMSG", "MERGE_MSG", "SQUASH_MSG", "TAG_EDITMSG"] {
464            map.insert(filename.to_string(), git_commit_scope.clone());
465        }
466
467        // Gitignore and similar files
468        let gitignore_scope = "source.gitignore".to_string();
469        for filename in [".gitignore", ".dockerignore", ".npmignore", ".hgignore"] {
470            map.insert(filename.to_string(), gitignore_scope.clone());
471        }
472
473        // Git config files
474        let gitconfig_scope = "source.gitconfig".to_string();
475        for filename in [".gitconfig", ".gitmodules"] {
476            map.insert(filename.to_string(), gitconfig_scope.clone());
477        }
478
479        // Git attributes files
480        let gitattributes_scope = "source.gitattributes".to_string();
481        map.insert(".gitattributes".to_string(), gitattributes_scope);
482
483        // Jenkinsfile -> Groovy
484        let groovy_scope = "source.groovy".to_string();
485        map.insert("Jenkinsfile".to_string(), groovy_scope);
486
487        // Vagrantfile -> Ruby (syntect already handles this, but be explicit)
488        // Brewfile -> Ruby
489        let ruby_scope = "source.ruby".to_string();
490        map.insert("Brewfile".to_string(), ruby_scope);
491
492        // Dockerfile and variants (exact names; Dockerfile.* handled via prefix check)
493        let dockerfile_scope = "source.dockerfile".to_string();
494        map.insert("Dockerfile".to_string(), dockerfile_scope.clone());
495        map.insert("Containerfile".to_string(), dockerfile_scope.clone());
496        // Common Dockerfile variants
497        map.insert("Dockerfile.dev".to_string(), dockerfile_scope.clone());
498        map.insert("Dockerfile.prod".to_string(), dockerfile_scope.clone());
499        map.insert("Dockerfile.test".to_string(), dockerfile_scope.clone());
500        map.insert("Dockerfile.build".to_string(), dockerfile_scope.clone());
501
502        // CMake
503        let cmake_scope = "source.cmake".to_string();
504        map.insert("CMakeLists.txt".to_string(), cmake_scope);
505
506        // Starlark/Bazel
507        let starlark_scope = "source.starlark".to_string();
508        map.insert("BUILD".to_string(), starlark_scope.clone());
509        map.insert("BUILD.bazel".to_string(), starlark_scope.clone());
510        map.insert("WORKSPACE".to_string(), starlark_scope.clone());
511        map.insert("WORKSPACE.bazel".to_string(), starlark_scope.clone());
512        map.insert("Tiltfile".to_string(), starlark_scope);
513
514        // Justfile (various casings)
515        let justfile_scope = "source.justfile".to_string();
516        map.insert("justfile".to_string(), justfile_scope.clone());
517        map.insert("Justfile".to_string(), justfile_scope.clone());
518        map.insert(".justfile".to_string(), justfile_scope);
519
520        // EditorConfig -> INI
521        let ini_scope = "source.ini".to_string();
522        map.insert(".editorconfig".to_string(), ini_scope);
523
524        // Earthfile
525        let earthfile_scope = "source.earthfile".to_string();
526        map.insert("Earthfile".to_string(), earthfile_scope);
527
528        // Hyprlang (Hyprland config files)
529        let hyprlang_scope = "source.hyprlang".to_string();
530        map.insert("hyprland.conf".to_string(), hyprlang_scope.clone());
531        map.insert("hyprpaper.conf".to_string(), hyprlang_scope.clone());
532        map.insert("hyprlock.conf".to_string(), hyprlang_scope);
533
534        // go.mod / go.sum
535        let gomod_scope = "source.gomod".to_string();
536        map.insert("go.mod".to_string(), gomod_scope.clone());
537        map.insert("go.sum".to_string(), gomod_scope);
538
539        // YAML(-ish) files without a .yaml/.yml extension (#2326). These are
540        // formats whose content is unambiguously YAML:
541        //   - yarn v1 lockfiles use a YAML-compatible format.
542        //   - .clang-format / _clang-format and .clang-tidy are YAML (LLVM).
543        //   - .yamllint is yamllint's own YAML config.
544        //   - Podfile.lock (CocoaPods) and pubspec.lock (Dart pub) are YAML.
545        let yaml_scope = "source.yaml".to_string();
546        for filename in [
547            "yarn.lock",
548            ".clang-format",
549            "_clang-format",
550            ".clang-tidy",
551            ".yamllint",
552            "Podfile.lock",
553            "pubspec.lock",
554        ] {
555            map.insert(filename.to_string(), yaml_scope.clone());
556        }
557
558        // Lock files whose content is TOML. Cargo.lock is also matched by
559        // syntect's first-line regex, but mapping it explicitly is robust
560        // regardless of the file's first line.
561        let toml_scope = "source.toml".to_string();
562        for filename in ["Cargo.lock", "poetry.lock", "uv.lock"] {
563            map.insert(filename.to_string(), toml_scope.clone());
564        }
565
566        // Lock files whose content is JSON. The JSON grammar has no first-line
567        // regex for a leading `{`, so these need an explicit filename mapping.
568        let json_scope = "source.json".to_string();
569        for filename in ["composer.lock", "Pipfile.lock", "flake.lock", "deno.lock"] {
570            map.insert(filename.to_string(), json_scope.clone());
571        }
572
573        map
574    }
575
576    /// Add embedded grammars (TOML, Odin, etc.) to a syntax set builder.
577    pub(crate) fn add_embedded_grammars(builder: &mut SyntaxSetBuilder) {
578        // TOML grammar
579        match SyntaxDefinition::load_from_str(TOML_GRAMMAR, true, Some("TOML")) {
580            Ok(syntax) => {
581                builder.add(syntax);
582                tracing::debug!("Loaded embedded TOML grammar");
583            }
584            Err(e) => {
585                tracing::warn!("Failed to load embedded TOML grammar: {}", e);
586            }
587        }
588
589        // Odin grammar
590        match SyntaxDefinition::load_from_str(ODIN_GRAMMAR, true, Some("Odin")) {
591            Ok(syntax) => {
592                builder.add(syntax);
593                tracing::debug!("Loaded embedded Odin grammar");
594            }
595            Err(e) => {
596                tracing::warn!("Failed to load embedded Odin grammar: {}", e);
597            }
598        }
599
600        // Zig grammar
601        match SyntaxDefinition::load_from_str(ZIG_GRAMMAR, true, Some("Zig")) {
602            Ok(syntax) => {
603                builder.add(syntax);
604                tracing::debug!("Loaded embedded Zig grammar");
605            }
606            Err(e) => {
607                tracing::warn!("Failed to load embedded Zig grammar: {}", e);
608            }
609        }
610
611        // GDScript grammar
612        match SyntaxDefinition::load_from_str(GDSCRIPT_GRAMMAR, true, Some("GDScript")) {
613            Ok(syntax) => {
614                builder.add(syntax);
615                tracing::debug!("Loaded embedded GDScript grammar");
616            }
617            Err(e) => {
618                tracing::warn!("Failed to load embedded GDScript grammar: {}", e);
619            }
620        }
621
622        // Git Rebase Todo grammar
623        match SyntaxDefinition::load_from_str(GIT_REBASE_GRAMMAR, true, Some("Git Rebase Todo")) {
624            Ok(syntax) => {
625                builder.add(syntax);
626                tracing::debug!("Loaded embedded Git Rebase Todo grammar");
627            }
628            Err(e) => {
629                tracing::warn!("Failed to load embedded Git Rebase Todo grammar: {}", e);
630            }
631        }
632
633        // Git Commit Message grammar
634        match SyntaxDefinition::load_from_str(GIT_COMMIT_GRAMMAR, true, Some("Git Commit Message"))
635        {
636            Ok(syntax) => {
637                builder.add(syntax);
638                tracing::debug!("Loaded embedded Git Commit Message grammar");
639            }
640            Err(e) => {
641                tracing::warn!("Failed to load embedded Git Commit Message grammar: {}", e);
642            }
643        }
644
645        // Gitignore grammar
646        match SyntaxDefinition::load_from_str(GITIGNORE_GRAMMAR, true, Some("Gitignore")) {
647            Ok(syntax) => {
648                builder.add(syntax);
649                tracing::debug!("Loaded embedded Gitignore grammar");
650            }
651            Err(e) => {
652                tracing::warn!("Failed to load embedded Gitignore grammar: {}", e);
653            }
654        }
655
656        // Git Config grammar
657        match SyntaxDefinition::load_from_str(GITCONFIG_GRAMMAR, true, Some("Git Config")) {
658            Ok(syntax) => {
659                builder.add(syntax);
660                tracing::debug!("Loaded embedded Git Config grammar");
661            }
662            Err(e) => {
663                tracing::warn!("Failed to load embedded Git Config grammar: {}", e);
664            }
665        }
666
667        // Git Attributes grammar
668        match SyntaxDefinition::load_from_str(GITATTRIBUTES_GRAMMAR, true, Some("Git Attributes")) {
669            Ok(syntax) => {
670                builder.add(syntax);
671                tracing::debug!("Loaded embedded Git Attributes grammar");
672            }
673            Err(e) => {
674                tracing::warn!("Failed to load embedded Git Attributes grammar: {}", e);
675            }
676        }
677
678        // Typst grammar
679        match SyntaxDefinition::load_from_str(TYPST_GRAMMAR, true, Some("Typst")) {
680            Ok(syntax) => {
681                builder.add(syntax);
682                tracing::debug!("Loaded embedded Typst grammar");
683            }
684            Err(e) => {
685                tracing::warn!("Failed to load embedded Typst grammar: {}", e);
686            }
687        }
688
689        // Additional embedded grammars for languages not in syntect defaults
690        let additional_grammars: &[(&str, &str)] = &[
691            (DOCKERFILE_GRAMMAR, "Dockerfile"),
692            (INI_GRAMMAR, "INI"),
693            (CMAKE_GRAMMAR, "CMake"),
694            (SCSS_GRAMMAR, "SCSS"),
695            (LESS_GRAMMAR, "LESS"),
696            (POWERSHELL_GRAMMAR, "PowerShell"),
697            (KOTLIN_GRAMMAR, "Kotlin"),
698            (SWIFT_GRAMMAR, "Swift"),
699            (DART_GRAMMAR, "Dart"),
700            (ELIXIR_GRAMMAR, "Elixir"),
701            (FSHARP_GRAMMAR, "FSharp"),
702            (NIX_GRAMMAR, "Nix"),
703            (HCL_GRAMMAR, "HCL"),
704            (PROTOBUF_GRAMMAR, "Protocol Buffers"),
705            (GRAPHQL_GRAMMAR, "GraphQL"),
706            (JULIA_GRAMMAR, "Julia"),
707            (NIM_GRAMMAR, "Nim"),
708            (GLEAM_GRAMMAR, "Gleam"),
709            (VLANG_GRAMMAR, "V"),
710            (SOLIDITY_GRAMMAR, "Solidity"),
711            (KDL_GRAMMAR, "KDL"),
712            (NUSHELL_GRAMMAR, "Nushell"),
713            (SMALI_GRAMMAR, "Smali"),
714            (FISH_GRAMMAR, "Fish"),
715            (STARLARK_GRAMMAR, "Starlark"),
716            (JUSTFILE_GRAMMAR, "Justfile"),
717            (EARTHFILE_GRAMMAR, "Earthfile"),
718            (GOMOD_GRAMMAR, "Go Module"),
719            (VUE_GRAMMAR, "Vue"),
720            (SVELTE_GRAMMAR, "Svelte"),
721            (ASTRO_GRAMMAR, "Astro"),
722            (HYPRLANG_GRAMMAR, "Hyprlang"),
723            (AUTOHOTKEY_GRAMMAR, "AutoHotkey"),
724            (RACKET_GRAMMAR, "Racket"),
725            (VERILOG_GRAMMAR, "Verilog"),
726            (SYSTEMVERILOG_GRAMMAR, "SystemVerilog"),
727            (VHDL_GRAMMAR, "VHDL"),
728            (C3_GRAMMAR, "C3"),
729            (ASM_GRAMMAR, "Assembly"),
730        ];
731
732        for (grammar_str, name) in additional_grammars {
733            match SyntaxDefinition::load_from_str(grammar_str, true, Some(name)) {
734                Ok(syntax) => {
735                    builder.add(syntax);
736                    tracing::debug!("Loaded embedded {} grammar", name);
737                }
738                Err(e) => {
739                    tracing::warn!("Failed to load embedded {} grammar: {}", name, e);
740                }
741            }
742        }
743    }
744
745    /// Find syntax for a file by path/extension/filename.
746    ///
747    /// Purely metadata-based — does not read the file. For first-line
748    /// (shebang) fallback, use [`find_by_path`] with a `first_line` argument
749    /// and resolve the returned entry's syntect index.
750    pub fn find_syntax_for_file(&self, path: &Path) -> Option<&SyntaxReference> {
751        let entry = self.find_by_path(path, None)?;
752        entry
753            .engines
754            .syntect
755            .map(|i| &self.syntax_set.syntaxes()[i])
756    }
757
758    /// Find syntax by name, with alias resolution.
759    ///
760    /// Thin wrapper around `find_by_name` that returns the associated syntect
761    /// `SyntaxReference`. Tree-sitter-only entries return `None`.
762    ///
763    /// Falls back to a direct syntect lookup for "Plain Text", which the
764    /// catalog deliberately omits but syntect still exposes.
765    pub fn find_syntax_by_name(&self, name: &str) -> Option<&SyntaxReference> {
766        if let Some(entry) = self.find_by_name(name) {
767            if let Some(idx) = entry.engines.syntect {
768                return Some(&self.syntax_set.syntaxes()[idx]);
769            }
770        }
771        // Plain Text is excluded from the catalog (it's not a "grammar" a user
772        // would ever pick), but syntect still stores it and a handful of
773        // callers still ask for it by name.
774        self.syntax_set.find_syntax_by_name(name)
775    }
776
777    // === Alias management ===
778
779    /// Hardcoded short-name aliases for built-in and embedded grammars.
780    ///
781    /// Each entry maps a short name (lowercase) to the exact syntect grammar name.
782    /// Only grammars whose full name differs significantly from a natural short
783    /// form need an entry here. Grammars already short (e.g., "Rust", "Go") are
784    /// reachable via case-insensitive matching and don't need aliases.
785    fn built_in_aliases() -> Vec<(&'static str, &'static str)> {
786        vec![
787            // Syntect built-in grammars with verbose names
788            ("bash", "Bourne Again Shell (bash)"),
789            ("shell", "Bourne Again Shell (bash)"),
790            ("sh", "Bourne Again Shell (bash)"),
791            ("c++", "C++"),
792            ("cpp", "C++"),
793            ("csharp", "C#"),
794            ("objc", "Objective-C"),
795            ("objcpp", "Objective-C++"),
796            ("regex", "Regular Expressions (Python)"),
797            ("regexp", "Regular Expressions (Python)"),
798            // Embedded grammars with multi-word or non-obvious names
799            ("proto", "Protocol Buffers"),
800            ("protobuf", "Protocol Buffers"),
801            ("gomod", "Go Module"),
802            ("git-rebase", "Git Rebase Todo"),
803            ("git-commit", "Git Commit Message"),
804            ("git-config", "Git Config"),
805            ("git-attributes", "Git Attributes"),
806            ("gitignore", "Gitignore"),
807            ("fsharp", "FSharp"),
808            ("f#", "FSharp"),
809            ("terraform", "HCL"),
810            ("tf", "HCL"),
811            ("ts", "TypeScript"),
812            ("js", "JavaScript"),
813            ("py", "Python"),
814            ("rb", "Ruby"),
815            ("rs", "Rust"),
816            ("md", "Markdown"),
817            ("yml", "YAML"),
818            ("dockerfile", "Dockerfile"),
819        ]
820    }
821
822    /// Populate aliases from the built-in table.
823    ///
824    /// Validates that:
825    /// - Each alias target (full name) exists in the syntax set
826    /// - No alias collides (case-insensitive) with an existing grammar full name
827    /// - No duplicate aliases exist
828    pub(crate) fn populate_built_in_aliases(&mut self) {
829        for (short, full) in Self::built_in_aliases() {
830            self.register_alias_inner(short, full, true);
831        }
832        self.rebuild_catalog();
833    }
834
835    /// Register a short-name alias for a grammar.
836    ///
837    /// Returns `true` if the alias was registered, `false` if rejected due to
838    /// collision or missing target. For built-in aliases, collisions panic
839    /// (they indicate a bug). For dynamic aliases, collisions log a warning.
840    ///
841    /// Splices the alias directly into the catalog rather than rebuilding, so
842    /// any user config previously merged via `apply_language_config` is
843    /// preserved. A full rebuild would wipe those entries.
844    pub(crate) fn register_alias(&mut self, short_name: &str, full_name: &str) -> bool {
845        if !self.register_alias_inner(short_name, full_name, false) {
846            return false;
847        }
848        let short_lower = short_name.to_lowercase();
849        let full_lower = full_name.to_lowercase();
850        if let Some(&idx) = self.catalog_by_name.get(&full_lower) {
851            self.catalog_by_name
852                .entry(short_lower.clone())
853                .or_insert(idx);
854            let entry = &mut self.catalog[idx];
855            let replace = match &entry.short_name {
856                None => true,
857                Some(existing) => short_name.len() < existing.len(),
858            };
859            if replace {
860                entry.short_name = Some(short_lower);
861            }
862        }
863        true
864    }
865
866    fn register_alias_inner(
867        &mut self,
868        short_name: &str,
869        full_name: &str,
870        is_built_in: bool,
871    ) -> bool {
872        let short_lower = short_name.to_lowercase();
873
874        // Validate: target grammar must exist in the syntax set
875        let target_exists = self
876            .syntax_set
877            .syntaxes()
878            .iter()
879            .any(|s| s.name.eq_ignore_ascii_case(full_name));
880        if !target_exists {
881            // Tree-sitter-only targets (e.g. TypeScript) are expected to be
882            // absent from the syntect set. `rebuild_catalog` attaches their
883            // short names via a separate pass over `built_in_aliases()`.
884            if tree_sitter_for_syntect_name(full_name).is_some() {
885                return false;
886            }
887            if is_built_in {
888                // Built-in alias targets should always exist; warn but don't panic
889                // (grammar might have been removed from syntect upstream)
890                tracing::warn!(
891                    "[grammar-alias] Built-in alias '{}' -> '{}': target grammar not found, skipping",
892                    short_name, full_name
893                );
894            } else {
895                tracing::warn!(
896                    "[grammar-alias] Alias '{}' -> '{}': target grammar not found, skipping",
897                    short_name,
898                    full_name
899                );
900            }
901            return false;
902        }
903
904        // Validate: short name must not collide (case-insensitive) with any grammar full name
905        let collides_with_full_name = self
906            .syntax_set
907            .syntaxes()
908            .iter()
909            .any(|s| s.name.eq_ignore_ascii_case(&short_lower));
910        if collides_with_full_name {
911            // This is actually fine — the short name matches a full name directly,
912            // so find_syntax_by_name's case-insensitive search will find it.
913            // No alias needed.
914            tracing::debug!(
915                "[grammar-alias] Alias '{}' matches an existing grammar name, skipping (not needed)",
916                short_name
917            );
918            return false;
919        }
920
921        // Validate: no duplicate alias (case-insensitive)
922        if let Some(existing_target) = self.aliases.get(&short_lower) {
923            if existing_target.eq_ignore_ascii_case(full_name) {
924                // Same mapping, no-op
925                return true;
926            }
927            let msg = format!(
928                "Alias '{}' already maps to '{}', cannot remap to '{}'",
929                short_name, existing_target, full_name
930            );
931            if is_built_in {
932                panic!("[grammar-alias] Built-in alias collision: {}", msg);
933            } else {
934                tracing::warn!("[grammar-alias] {}", msg);
935                return false;
936            }
937        }
938
939        // Resolve the exact syntect name (preserving original case)
940        let exact_name = self
941            .syntax_set
942            .syntaxes()
943            .iter()
944            .find(|s| s.name.eq_ignore_ascii_case(full_name))
945            .map(|s| s.name.clone())
946            .unwrap();
947
948        self.aliases.insert(short_lower, exact_name);
949        true
950    }
951
952    // === Unified catalog ===
953
954    /// Rebuild the flat catalog of grammar entries.
955    ///
956    /// Called after the syntax set, aliases, or filename scopes change.
957    /// Produces one entry per logical language by merging:
958    /// 1. Every `SyntaxReference` in the syntax set (except "Plain Text")
959    /// 2. Every `fresh_languages::Language` not already covered by a syntect entry
960    /// 3. Alias short-names attached to their target entry
961    /// 4. Filename mappings from `filename_scopes` attached to their scope's entry
962    /// 5. Extra extensions from `user_extensions` attached to their scope's entry
963    ///
964    /// Automatically replays the last `apply_language_config` at the end, so
965    /// user `[languages]` config survives any rebuild.
966    pub(crate) fn rebuild_catalog(&mut self) {
967        // Reverse-map: full_name (lowercase) -> shortest alias.
968        //
969        // Seed from the built-in alias table as well as the live `aliases`
970        // HashMap: the live map only contains aliases whose target exists in
971        // the syntect set, so tree-sitter-only entries (TypeScript) would
972        // otherwise never get their short name ("ts").
973        let mut short_by_full: HashMap<String, String> = HashMap::new();
974        let record = |map: &mut HashMap<String, String>, short: &str, full: &str| {
975            let key = full.to_lowercase();
976            let keep = match map.get(&key) {
977                None => true,
978                Some(existing) => short.len() < existing.len(),
979            };
980            if keep {
981                map.insert(key, short.to_string());
982            }
983        };
984        for (short, full) in Self::built_in_aliases() {
985            record(&mut short_by_full, short, full);
986        }
987        for (short, full) in &self.aliases {
988            record(&mut short_by_full, short, full);
989        }
990
991        let derive_language_id =
992            |display_name: &str| -> (String, Option<fresh_languages::Language>) {
993                let ts = tree_sitter_for_syntect_name(display_name);
994                let id = ts
995                    .map(|l| l.id().to_string())
996                    .unwrap_or_else(|| display_name.to_lowercase());
997                (id, ts)
998            };
999
1000        let mut catalog: Vec<GrammarEntry> = Vec::new();
1001        let mut scope_to_index: HashMap<String, usize> = HashMap::new();
1002
1003        // Syntect-backed entries (skip Plain Text and JavaScript).
1004        //
1005        // Syntect's `file_extensions` is a hybrid list: real extensions like
1006        // "rb" sit alongside bare filenames like "Gemfile", "Rakefile",
1007        // "Makefile". Syntect's own `find_syntax_for_file` tries each entry
1008        // against the whole filename AND against the path's extension, and
1009        // the catalog has to preserve that semantics. We keep everything in
1010        // `extensions` here and index each entry as *both* an extension and
1011        // a filename at the bottom of this method.
1012        //
1013        // JavaScript is skipped here so the catalog falls through to the
1014        // tree-sitter-only fallback below — the bundled syntect JS grammar
1015        // mishandles class fields whose initialiser is an arrow function
1016        // returning a template literal (issue #899: state leaks past the
1017        // closing backtick and paints the rest of the file as a string).
1018        // tree-sitter-javascript parses template literals from the AST and
1019        // does not have this failure mode. `find_syntax_by_name("JavaScript")`
1020        // still returns syntect's grammar via the catalog's fallback path,
1021        // so markdown popup rendering and other code-string highlighters
1022        // are unaffected.
1023        for (idx, syntax) in self.syntax_set.syntaxes().iter().enumerate() {
1024            if syntax.name == "Plain Text" || syntax.name == "JavaScript" {
1025                continue;
1026            }
1027            let (language_id, tree_sitter) = derive_language_id(&syntax.name);
1028            let short_name = short_by_full.get(&syntax.name.to_lowercase()).cloned();
1029            let source = self
1030                .grammar_sources
1031                .get(&syntax.name)
1032                .map(|info| info.source.clone())
1033                .unwrap_or(GrammarSource::BuiltIn);
1034            let entry_index = catalog.len();
1035            scope_to_index.insert(syntax.scope.to_string(), entry_index);
1036
1037            // Union syntect's file_extensions with tree-sitter's own
1038            // extension list when the entry carries both engines.
1039            // tree-sitter-javascript handles `.jsx`/`.mjs`/`.cjs` that
1040            // syntect's JS grammar doesn't list, and the old code used to
1041            // route those paths to tree-sitter via a separate lookup.
1042            let mut extensions = syntax.file_extensions.clone();
1043            if let Some(lang) = tree_sitter {
1044                for ext in lang.extensions() {
1045                    let ext = ext.to_string();
1046                    if !extensions.iter().any(|e| e == &ext) {
1047                        extensions.push(ext);
1048                    }
1049                }
1050            }
1051
1052            // Only Fresh's dedicated Fish grammar may own `.fish`. Syntect's
1053            // stock Bash grammar also advertises the extension (a quirk of its
1054            // upstream definition), which would otherwise shadow Fish in the
1055            // first-wins extension index since Bash precedes it in the packdump.
1056            if syntax.name != "Fish" {
1057                extensions.retain(|e| e != "fish");
1058            }
1059
1060            catalog.push(GrammarEntry {
1061                display_name: syntax.name.clone(),
1062                language_id,
1063                short_name,
1064                extensions,
1065                filenames: Vec::new(),
1066                filename_globs: Vec::new(),
1067                source,
1068                engines: GrammarEngines {
1069                    syntect: Some(idx),
1070                    tree_sitter,
1071                },
1072            });
1073        }
1074
1075        // Attach filename_scopes to their entries.
1076        for (filename, scope) in &self.filename_scopes {
1077            if let Some(&idx) = scope_to_index.get(scope) {
1078                if !catalog[idx].filenames.iter().any(|f| f == filename) {
1079                    catalog[idx].filenames.push(filename.clone());
1080                }
1081            }
1082        }
1083
1084        // Attach user_extensions (extra → scope) to their entries.
1085        for (ext, scope) in &self.user_extensions {
1086            if let Some(&idx) = scope_to_index.get(scope) {
1087                if !catalog[idx].extensions.iter().any(|e| e == ext) {
1088                    catalog[idx].extensions.push(ext.clone());
1089                }
1090            }
1091        }
1092
1093        // Ensure every tree-sitter language has an entry. If a syntect entry
1094        // already maps to the same tree-sitter language, skip it; otherwise
1095        // add a tree-sitter-only entry so the catalog is complete (TypeScript
1096        // being the motivating example — syntect ships no grammar for it).
1097        let mut ts_covered: std::collections::HashSet<fresh_languages::Language> =
1098            std::collections::HashSet::new();
1099        for entry in &catalog {
1100            if let Some(lang) = entry.engines.tree_sitter {
1101                ts_covered.insert(lang);
1102            }
1103        }
1104        for lang in fresh_languages::Language::all() {
1105            if ts_covered.contains(lang) {
1106                continue;
1107            }
1108            let display_name = lang.display_name().to_string();
1109            let language_id = lang.id().to_string();
1110            let short_name = short_by_full.get(&display_name.to_lowercase()).cloned();
1111            let extensions: Vec<String> = lang.extensions().iter().map(|s| s.to_string()).collect();
1112            catalog.push(GrammarEntry {
1113                display_name,
1114                language_id,
1115                short_name,
1116                extensions,
1117                filenames: Vec::new(),
1118                filename_globs: Vec::new(),
1119                source: GrammarSource::BuiltIn,
1120                engines: GrammarEngines {
1121                    syntect: None,
1122                    tree_sitter: Some(*lang),
1123                },
1124            });
1125        }
1126
1127        // Build name / extension / filename indices.
1128        //
1129        // Every entry in `extensions` gets indexed in BOTH `by_extension`
1130        // (lowercased) AND `by_filename` (exact case) — syntect's
1131        // `file_extensions` list holds both real extensions ("rb") and bare
1132        // filenames ("Gemfile", "Rakefile", "Makefile"). Indexing both ways
1133        // matches syntect's own `find_syntax_for_file` semantics.
1134        let mut by_name: HashMap<String, usize> = HashMap::new();
1135        let mut by_extension: HashMap<String, usize> = HashMap::new();
1136        let mut by_filename: HashMap<String, usize> = HashMap::new();
1137        for (idx, entry) in catalog.iter().enumerate() {
1138            by_name.insert(entry.display_name.to_lowercase(), idx);
1139            by_name.insert(entry.language_id.to_lowercase(), idx);
1140            if let Some(short) = &entry.short_name {
1141                by_name.insert(short.to_lowercase(), idx);
1142            }
1143            for ext in &entry.extensions {
1144                by_extension.entry(ext.to_lowercase()).or_insert(idx);
1145                by_filename.entry(ext.clone()).or_insert(idx);
1146            }
1147            for filename in &entry.filenames {
1148                by_filename.entry(filename.clone()).or_insert(idx);
1149            }
1150        }
1151
1152        self.catalog = catalog;
1153        self.catalog_by_name = by_name;
1154        self.catalog_by_extension = by_extension;
1155        self.catalog_by_filename = by_filename;
1156
1157        // Replay the most recent user config so a rebuild doesn't silently
1158        // wipe out user `[languages]` rules. `take` + restore avoids both a
1159        // clone and a borrow checker fight with `apply_language_config_inner`.
1160        if !self.applied_language_config.is_empty() {
1161            let cfg = std::mem::take(&mut self.applied_language_config);
1162            self.apply_language_config_inner(&cfg);
1163            self.applied_language_config = cfg;
1164        }
1165        self.catalog_gen = self.catalog_gen.wrapping_add(1);
1166    }
1167
1168    /// Return the full catalog of grammar entries.
1169    pub fn catalog(&self) -> &[GrammarEntry] {
1170        &self.catalog
1171    }
1172
1173    /// Monotonic generation, bumped on every catalog mutation. Compare against
1174    /// a previously-observed value to decide whether to recompute derived
1175    /// state.
1176    pub fn catalog_gen(&self) -> u64 {
1177        self.catalog_gen
1178    }
1179
1180    /// Look up a grammar entry by display name, language ID, or short alias
1181    /// (case-insensitive). All aliases — built-in and user-config-declared —
1182    /// are indexed directly in `catalog_by_name` during `rebuild_catalog` /
1183    /// `register_alias` / `apply_language_config`, so a single lookup covers
1184    /// every case.
1185    pub fn find_by_name(&self, name: &str) -> Option<&GrammarEntry> {
1186        self.catalog_by_name
1187            .get(&name.to_lowercase())
1188            .map(|&idx| &self.catalog[idx])
1189    }
1190
1191    /// Look up a grammar entry by file path, with optional first-line content
1192    /// for shebang / `first_line_match` detection.
1193    ///
1194    /// Resolution order:
1195    /// 1. Exact filename (config-declared filenames and filename_scopes live here)
1196    /// 2. Glob patterns from user config (e.g. "*.conf", "/etc/**/rc.*")
1197    /// 3. File extension
1198    /// 4. Shebang / first-line regex match on `first_line` if supplied
1199    ///
1200    /// Globs take priority over extension so a user rule like `*.conf → bash`
1201    /// wins over any built-in extension match on `.conf`. The first-line
1202    /// fallback (#4) is last so catalog matches stay authoritative — syntect
1203    /// might otherwise misclassify `.fish` as bash via its first-line
1204    /// regexes.
1205    ///
1206    /// The first-line fallback is pure: it runs syntect's
1207    /// `find_syntax_by_first_line` regex cache against the caller-supplied
1208    /// string. The registry never touches the filesystem — the caller (who
1209    /// already loaded the buffer via the `FileSystem` trait) must extract
1210    /// the first line and pass it in.
1211    pub fn find_by_path(&self, path: &Path, first_line: Option<&str>) -> Option<&GrammarEntry> {
1212        let filename = path.file_name().and_then(|n| n.to_str());
1213        let path_str = path.to_str().unwrap_or("");
1214
1215        if let Some(name) = filename {
1216            if let Some(&idx) = self.catalog_by_filename.get(name) {
1217                return Some(&self.catalog[idx]);
1218            }
1219        }
1220
1221        // Glob walk — filenames with globs are rare so linear scan is fine.
1222        if let Some(name) = filename {
1223            for entry in &self.catalog {
1224                for pattern in &entry.filename_globs {
1225                    let matched = if is_path_pattern(pattern) {
1226                        path_glob_matches(pattern, path_str)
1227                    } else {
1228                        filename_glob_matches(pattern, name)
1229                    };
1230                    if matched {
1231                        return Some(entry);
1232                    }
1233                }
1234            }
1235        }
1236
1237        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1238            if let Some(entry) = self.find_by_extension(ext) {
1239                return Some(entry);
1240            }
1241        }
1242
1243        // Last resort: shebang / first-line regex match against the
1244        // caller-supplied content. Map the matched syntect grammar back to a
1245        // catalog entry by name — every syntect syntax has a catalog entry,
1246        // so this round-trip preserves tree-sitter attachment.
1247        let line = first_line?;
1248        let syntax = self.syntax_set.find_syntax_by_first_line(line)?;
1249        self.find_by_name(&syntax.name)
1250    }
1251
1252    /// Look up a grammar entry by file extension (case-insensitive, without dot).
1253    pub fn find_by_extension(&self, ext: &str) -> Option<&GrammarEntry> {
1254        self.catalog_by_extension
1255            .get(&ext.to_lowercase())
1256            .map(|&idx| &self.catalog[idx])
1257    }
1258
1259    /// Merge user `[languages]` config into the catalog.
1260    ///
1261    /// For each config entry, resolves its grammar to an existing catalog entry
1262    /// (by grammar name or by language id). Extensions are added and override
1263    /// the ext→entry index so config wins over built-in mappings. Filenames are
1264    /// split into exact matches (indexed) and globs (walked at lookup time).
1265    ///
1266    /// If no existing entry matches, a new engine-less entry is created so the
1267    /// language still appears in the palette.
1268    ///
1269    /// Idempotent. The config is cached on the registry so `rebuild_catalog`
1270    /// can replay it — callers don't need to re-apply after a rebuild.
1271    pub fn apply_language_config(
1272        &mut self,
1273        languages: &HashMap<String, crate::config::LanguageConfig>,
1274    ) {
1275        self.applied_language_config = languages.clone();
1276        self.apply_language_config_inner(languages);
1277        self.catalog_gen = self.catalog_gen.wrapping_add(1);
1278    }
1279
1280    /// Do the actual catalog splicing without touching
1281    /// `applied_language_config`. Called from `apply_language_config` (which
1282    /// records the input) and from `rebuild_catalog` (which replays the
1283    /// cached input after wiping the catalog).
1284    fn apply_language_config_inner(
1285        &mut self,
1286        languages: &HashMap<String, crate::config::LanguageConfig>,
1287    ) {
1288        for (lang_id, lang_cfg) in languages {
1289            let grammar_name = if lang_cfg.grammar.is_empty() {
1290                lang_id.as_str()
1291            } else {
1292                lang_cfg.grammar.as_str()
1293            };
1294
1295            // Resolve to an existing entry; fall back to creating one.
1296            let idx = self
1297                .catalog_by_name
1298                .get(&grammar_name.to_lowercase())
1299                .copied()
1300                .or_else(|| self.catalog_by_name.get(&lang_id.to_lowercase()).copied())
1301                .unwrap_or_else(|| {
1302                    let idx = self.catalog.len();
1303                    self.catalog.push(GrammarEntry {
1304                        display_name: lang_id.clone(),
1305                        language_id: lang_id.clone(),
1306                        short_name: None,
1307                        extensions: Vec::new(),
1308                        filenames: Vec::new(),
1309                        filename_globs: Vec::new(),
1310                        source: GrammarSource::BuiltIn,
1311                        engines: GrammarEngines::default(),
1312                    });
1313                    idx
1314                });
1315
1316            // Always index the config key so `find_by_name("mylang")` resolves
1317            // even when `mylang` aliases an existing grammar (e.g.
1318            // `[languages.mylang] grammar = "Rust"`). `or_insert` preserves
1319            // any existing mapping — won't clobber the canonical entry.
1320            self.catalog_by_name
1321                .entry(lang_id.to_lowercase())
1322                .or_insert(idx);
1323
1324            for ext in &lang_cfg.extensions {
1325                if !self.catalog[idx].extensions.iter().any(|e| e == ext) {
1326                    self.catalog[idx].extensions.push(ext.clone());
1327                }
1328                // Config-declared extensions override any previous mapping.
1329                self.catalog_by_extension.insert(ext.to_lowercase(), idx);
1330            }
1331            for filename in &lang_cfg.filenames {
1332                if is_glob_pattern(filename) {
1333                    if !self.catalog[idx]
1334                        .filename_globs
1335                        .iter()
1336                        .any(|f| f == filename)
1337                    {
1338                        self.catalog[idx].filename_globs.push(filename.clone());
1339                    }
1340                } else {
1341                    if !self.catalog[idx].filenames.iter().any(|f| f == filename) {
1342                        self.catalog[idx].filenames.push(filename.clone());
1343                    }
1344                    self.catalog_by_filename.insert(filename.clone(), idx);
1345                }
1346            }
1347        }
1348    }
1349
1350    /// Get the underlying syntax set
1351    pub fn syntax_set(&self) -> &Arc<SyntaxSet> {
1352        &self.syntax_set
1353    }
1354
1355    /// Get a clone of the Arc for sharing
1356    pub fn syntax_set_arc(&self) -> Arc<SyntaxSet> {
1357        Arc::clone(&self.syntax_set)
1358    }
1359
1360    /// List all available syntax names
1361    pub fn available_syntaxes(&self) -> Vec<&str> {
1362        self.syntax_set
1363            .syntaxes()
1364            .iter()
1365            .map(|s| s.name.as_str())
1366            .collect()
1367    }
1368
1369    /// List all available grammars with provenance information.
1370    ///
1371    /// Returns a sorted list of `GrammarInfo` entries derived from the unified
1372    /// catalog — this includes both syntect grammars and tree-sitter-only
1373    /// languages (like TypeScript). Each entry is listed exactly once even
1374    /// when both engines can serve it.
1375    pub fn available_grammar_info(&self) -> Vec<GrammarInfo> {
1376        let mut result: Vec<GrammarInfo> = self
1377            .catalog
1378            .iter()
1379            .map(|entry| GrammarInfo {
1380                name: entry.display_name.clone(),
1381                source: entry.source.clone(),
1382                file_extensions: entry.extensions.clone(),
1383                short_name: entry.short_name.clone(),
1384            })
1385            .collect();
1386        result.sort_by(|a, b| a.name.to_lowercase().cmp(&b.name.to_lowercase()));
1387        result
1388    }
1389
1390    /// Get the grammar sources map.
1391    pub(crate) fn grammar_sources(&self) -> &HashMap<String, GrammarInfo> {
1392        &self.grammar_sources
1393    }
1394
1395    /// Build grammar source info from a pre-compiled syntax set.
1396    ///
1397    /// All grammars in the packdump (syntect defaults + embedded) are tagged as built-in.
1398    pub(crate) fn build_grammar_sources_from_syntax_set(
1399        syntax_set: &SyntaxSet,
1400    ) -> HashMap<String, GrammarInfo> {
1401        let mut sources = HashMap::new();
1402        for syntax in syntax_set.syntaxes() {
1403            sources.insert(
1404                syntax.name.clone(),
1405                GrammarInfo {
1406                    name: syntax.name.clone(),
1407                    source: GrammarSource::BuiltIn,
1408                    file_extensions: syntax.file_extensions.clone(),
1409                    short_name: None,
1410                },
1411            );
1412        }
1413        sources
1414    }
1415
1416    /// Get the user extensions mapping (extension -> scope name).
1417    #[cfg(test)]
1418    pub(crate) fn user_extensions(&self) -> &HashMap<String, String> {
1419        &self.user_extensions
1420    }
1421
1422    /// Get the loaded grammar paths (for deduplication in flush_pending_grammars).
1423    #[cfg(test)]
1424    pub(crate) fn loaded_grammar_paths(&self) -> &[GrammarSpec] {
1425        &self.loaded_grammar_paths
1426    }
1427
1428    /// Create a new registry with additional grammar files
1429    ///
1430    /// This builds a new GrammarRegistry that includes all grammars from
1431    /// the base registry plus the additional grammars specified.
1432    /// Uses the base registry's syntax_set as the builder base, preserving
1433    /// all existing grammars (user grammars, language packs, etc.).
1434    ///
1435    /// # Arguments
1436    /// * `base` - The base registry to extend
1437    /// * `additional` - List of (language, path, extensions) tuples for new grammars
1438    ///
1439    /// # Returns
1440    /// A new GrammarRegistry with the additional grammars, or None if rebuilding fails
1441    pub fn with_additional_grammars(
1442        base: &GrammarRegistry,
1443        additional: &[GrammarSpec],
1444    ) -> Option<Self> {
1445        tracing::info!(
1446            "[SYNTAX DEBUG] with_additional_grammars: adding {} grammars to base with {} syntaxes",
1447            additional.len(),
1448            base.syntax_set.syntaxes().len()
1449        );
1450
1451        // Use the base registry's syntax_set as builder base — this preserves
1452        // ALL existing grammars (defaults, embedded, user, language packs)
1453        // without needing to reload them from disk.
1454        let mut builder = (*base.syntax_set).clone().into_builder();
1455
1456        // Preserve existing user extensions and add new ones
1457        let mut user_extensions = base.user_extensions.clone();
1458
1459        // Track loaded grammar paths (existing + new)
1460        let mut loaded_grammar_paths = base.loaded_grammar_paths.clone();
1461
1462        // Preserve existing grammar sources
1463        let mut grammar_sources = base.grammar_sources.clone();
1464
1465        // Add each new grammar
1466        for spec in additional {
1467            tracing::info!(
1468                "[SYNTAX DEBUG] loading new grammar file: lang='{}', path={:?}, extensions={:?}",
1469                spec.language,
1470                spec.path,
1471                spec.extensions
1472            );
1473            match Self::load_grammar_file(&spec.path) {
1474                Ok(syntax) => {
1475                    let scope = syntax.scope.to_string();
1476                    let syntax_name = syntax.name.clone();
1477                    tracing::info!(
1478                        "[SYNTAX DEBUG] grammar loaded successfully: name='{}', scope='{}'",
1479                        syntax_name,
1480                        scope
1481                    );
1482                    builder.add(syntax);
1483                    tracing::info!(
1484                        "Loaded grammar for '{}' from {:?} with extensions {:?}",
1485                        spec.language,
1486                        spec.path,
1487                        spec.extensions
1488                    );
1489                    // Register extensions for this grammar
1490                    for ext in &spec.extensions {
1491                        user_extensions.insert(ext.clone(), scope.clone());
1492                    }
1493                    // Track provenance
1494                    grammar_sources.insert(
1495                        syntax_name.clone(),
1496                        GrammarInfo {
1497                            name: syntax_name,
1498                            source: GrammarSource::Plugin {
1499                                plugin: spec.language.clone(),
1500                                path: spec.path.clone(),
1501                            },
1502                            file_extensions: spec.extensions.clone(),
1503                            short_name: None,
1504                        },
1505                    );
1506                    // Track this grammar path for future reloads
1507                    loaded_grammar_paths.push(spec.clone());
1508                }
1509                Err(e) => {
1510                    tracing::warn!(
1511                        "Failed to load grammar for '{}' from {:?}: {}",
1512                        spec.language,
1513                        spec.path,
1514                        e
1515                    );
1516                }
1517            }
1518        }
1519
1520        let mut reg = Self {
1521            syntax_set: Arc::new(builder.build()),
1522            user_extensions,
1523            filename_scopes: base.filename_scopes.clone(),
1524            loaded_grammar_paths,
1525            grammar_sources,
1526            aliases: base.aliases.clone(),
1527            catalog: Vec::new(),
1528            catalog_by_name: HashMap::new(),
1529            catalog_by_extension: HashMap::new(),
1530            catalog_by_filename: HashMap::new(),
1531            applied_language_config: HashMap::new(),
1532            catalog_gen: 0,
1533        };
1534        reg.rebuild_catalog();
1535        Some(reg)
1536    }
1537
1538    /// Load a grammar file from disk
1539    ///
1540    /// Only Sublime Text (.sublime-syntax) format is supported.
1541    /// TextMate (.tmLanguage) grammars use a completely different format
1542    /// and cannot be loaded by syntect's yaml-load feature.
1543    pub(crate) fn load_grammar_file(path: &Path) -> Result<SyntaxDefinition, String> {
1544        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
1545
1546        match ext {
1547            "sublime-syntax" => {
1548                let content = std::fs::read_to_string(path)
1549                    .map_err(|e| format!("Failed to read file: {}", e))?;
1550                SyntaxDefinition::load_from_str(
1551                    &content,
1552                    true,
1553                    path.file_stem().and_then(|s| s.to_str()),
1554                )
1555                .map_err(|e| format!("Failed to parse sublime-syntax: {}", e))
1556            }
1557            _ => Err(format!(
1558                "Unsupported grammar format: .{}. Only .sublime-syntax is supported.",
1559                ext
1560            )),
1561        }
1562    }
1563}
1564
1565impl Default for GrammarRegistry {
1566    fn default() -> Self {
1567        // Create with defaults and embedded grammars only (no user grammars)
1568        let defaults = SyntaxSet::load_defaults_newlines();
1569        let mut builder = defaults.into_builder();
1570        Self::add_embedded_grammars(&mut builder);
1571        let syntax_set = builder.build();
1572        let filename_scopes = Self::build_filename_scopes();
1573        let extra_extensions = Self::build_extra_extensions();
1574
1575        let mut registry = Self::new(syntax_set, extra_extensions, filename_scopes);
1576        registry.populate_built_in_aliases();
1577        registry.rebuild_catalog();
1578        registry
1579    }
1580}
1581
1582// VSCode package.json structures for parsing grammar manifests
1583
1584#[derive(Debug, Deserialize)]
1585pub struct PackageManifest {
1586    #[serde(default)]
1587    pub contributes: Option<Contributes>,
1588}
1589
1590#[derive(Debug, Deserialize, Default)]
1591pub struct Contributes {
1592    #[serde(default)]
1593    pub languages: Vec<LanguageContribution>,
1594    #[serde(default)]
1595    pub grammars: Vec<GrammarContribution>,
1596}
1597
1598#[derive(Debug, Deserialize)]
1599pub struct LanguageContribution {
1600    pub id: String,
1601    #[serde(default)]
1602    pub extensions: Vec<String>,
1603}
1604
1605#[derive(Debug, Deserialize)]
1606pub struct GrammarContribution {
1607    pub language: String,
1608    #[serde(rename = "scopeName")]
1609    pub scope_name: String,
1610    pub path: String,
1611}
1612
1613#[cfg(test)]
1614mod tests {
1615    use super::*;
1616
1617    #[test]
1618    fn test_empty_registry() {
1619        let registry = GrammarRegistry::empty();
1620        // Should have at least plain text
1621        assert!(!registry.available_syntaxes().is_empty());
1622    }
1623
1624    #[test]
1625    fn test_default_registry() {
1626        let registry = GrammarRegistry::default();
1627        // Should have built-in syntaxes
1628        assert!(!registry.available_syntaxes().is_empty());
1629    }
1630
1631    #[test]
1632    fn test_find_syntax_for_common_extensions() {
1633        let registry = GrammarRegistry::default();
1634
1635        // Test common extensions that resolve to a syntect (TextMate) grammar
1636        // via the catalog. JavaScript is intentionally NOT here — it is routed
1637        // exclusively to tree-sitter (issue #899) and so has no catalog-level
1638        // syntect entry. Code-block highlighting in popups still finds the
1639        // syntect JS grammar through `SyntaxSet::find_syntax_by_token`, which
1640        // bypasses the catalog.
1641        let test_cases = [
1642            ("test.py", true),
1643            ("test.rs", true),
1644            ("test.js", false),
1645            ("test.json", true),
1646            ("test.md", true),
1647            ("test.html", true),
1648            ("test.css", true),
1649            ("test.gd", true),
1650            ("test.unknown_extension_xyz", false),
1651        ];
1652
1653        for (filename, should_exist) in test_cases {
1654            let path = Path::new(filename);
1655            let result = registry.find_syntax_for_file(path);
1656            assert_eq!(
1657                result.is_some(),
1658                should_exist,
1659                "Expected {:?} for {}",
1660                should_exist,
1661                filename
1662            );
1663        }
1664    }
1665
1666    #[test]
1667    fn test_racket_grammar_loaded() {
1668        let registry = GrammarRegistry::default();
1669        for filename in ["main.rkt", "data.rktd", "info.rktl", "doc.scrbl"] {
1670            let result = registry.find_syntax_for_file(Path::new(filename));
1671            assert!(
1672                result.is_some(),
1673                "Racket grammar should be available for {}",
1674                filename
1675            );
1676            let entry = registry.find_by_path(Path::new(filename), None).unwrap();
1677            assert_eq!(entry.display_name, "Racket", "for {}", filename);
1678        }
1679    }
1680
1681    #[test]
1682    fn test_syntax_set_arc() {
1683        let registry = GrammarRegistry::default();
1684        let arc1 = registry.syntax_set_arc();
1685        let arc2 = registry.syntax_set_arc();
1686        // Both should point to the same data
1687        assert!(Arc::ptr_eq(&arc1, &arc2));
1688    }
1689
1690    #[test]
1691    fn test_shell_dotfiles_detection() {
1692        let registry = GrammarRegistry::default();
1693
1694        // All these should be detected as shell scripts
1695        let shell_files = [".zshrc", ".zprofile", ".zshenv", ".bash_aliases"];
1696
1697        for filename in shell_files {
1698            let path = Path::new(filename);
1699            let result = registry.find_syntax_for_file(path);
1700            assert!(
1701                result.is_some(),
1702                "{} should be detected as a syntax",
1703                filename
1704            );
1705            let syntax = result.unwrap();
1706            // Should be detected as Bash/Shell
1707            assert!(
1708                syntax.name.to_lowercase().contains("bash")
1709                    || syntax.name.to_lowercase().contains("shell"),
1710                "{} should be detected as shell/bash, got: {}",
1711                filename,
1712                syntax.name
1713            );
1714        }
1715    }
1716
1717    #[test]
1718    fn test_pkgbuild_detection() {
1719        let registry = GrammarRegistry::default();
1720
1721        // PKGBUILD and APKBUILD should be detected as shell scripts
1722        for filename in ["PKGBUILD", "APKBUILD"] {
1723            let path = Path::new(filename);
1724            let result = registry.find_syntax_for_file(path);
1725            assert!(
1726                result.is_some(),
1727                "{} should be detected as a syntax",
1728                filename
1729            );
1730            let syntax = result.unwrap();
1731            // Should be detected as Bash/Shell
1732            assert!(
1733                syntax.name.to_lowercase().contains("bash")
1734                    || syntax.name.to_lowercase().contains("shell"),
1735                "{} should be detected as shell/bash, got: {}",
1736                filename,
1737                syntax.name
1738            );
1739        }
1740    }
1741
1742    #[test]
1743    fn test_find_syntax_with_glob_filenames() {
1744        let mut registry = GrammarRegistry::default();
1745        let mut languages = std::collections::HashMap::new();
1746        languages.insert(
1747            "shell-configs".to_string(),
1748            crate::config::LanguageConfig {
1749                extensions: vec!["sh".to_string()],
1750                filenames: vec!["*.conf".to_string(), "*rc".to_string()],
1751                grammar: "bash".to_string(),
1752                comment_prefix: Some("#".to_string()),
1753                auto_indent: true,
1754                auto_close: None,
1755                auto_surround: None,
1756                textmate_grammar: None,
1757                show_whitespace_tabs: true,
1758                line_wrap: None,
1759                wrap_column: None,
1760                page_view: None,
1761                page_width: None,
1762                use_tabs: None,
1763                tab_size: None,
1764                formatter: None,
1765                format_on_save: false,
1766                on_save: vec![],
1767                word_characters: None,
1768                indent: None,
1769            },
1770        );
1771        registry.apply_language_config(&languages);
1772
1773        assert!(
1774            registry
1775                .find_by_path(Path::new("nftables.conf"), None)
1776                .is_some(),
1777            "*.conf should match nftables.conf"
1778        );
1779        assert!(
1780            registry.find_by_path(Path::new("lfrc"), None).is_some(),
1781            "*rc should match lfrc"
1782        );
1783        // Unrelated file shouldn't panic.
1784        let _ = registry.find_by_path(Path::new("randomfile"), None);
1785    }
1786
1787    #[test]
1788    fn test_find_syntax_with_path_glob_filenames() {
1789        let mut registry = GrammarRegistry::default();
1790        let mut languages = std::collections::HashMap::new();
1791        languages.insert(
1792            "shell-configs".to_string(),
1793            crate::config::LanguageConfig {
1794                extensions: vec!["sh".to_string()],
1795                filenames: vec!["/etc/**/rc.*".to_string()],
1796                grammar: "bash".to_string(),
1797                comment_prefix: Some("#".to_string()),
1798                auto_indent: true,
1799                auto_close: None,
1800                auto_surround: None,
1801                textmate_grammar: None,
1802                show_whitespace_tabs: true,
1803                line_wrap: None,
1804                wrap_column: None,
1805                page_view: None,
1806                page_width: None,
1807                use_tabs: None,
1808                tab_size: None,
1809                formatter: None,
1810                format_on_save: false,
1811                on_save: vec![],
1812                word_characters: None,
1813                indent: None,
1814            },
1815        );
1816        registry.apply_language_config(&languages);
1817
1818        assert!(
1819            registry
1820                .find_by_path(Path::new("/etc/rc.conf"), None)
1821                .is_some(),
1822            "/etc/**/rc.* should match /etc/rc.conf"
1823        );
1824        assert!(
1825            registry
1826                .find_by_path(Path::new("/etc/init/rc.local"), None)
1827                .is_some(),
1828            "/etc/**/rc.* should match /etc/init/rc.local"
1829        );
1830        let _ = registry.find_by_path(Path::new("/var/rc.conf"), None);
1831    }
1832
1833    #[test]
1834    fn test_exact_filename_takes_priority_over_glob() {
1835        let mut registry = GrammarRegistry::default();
1836        let mut languages = std::collections::HashMap::new();
1837
1838        // A language with exact filename "lfrc" -> python grammar
1839        languages.insert(
1840            "custom-lfrc".to_string(),
1841            crate::config::LanguageConfig {
1842                extensions: vec![],
1843                filenames: vec!["lfrc".to_string()],
1844                grammar: "python".to_string(),
1845                comment_prefix: Some("#".to_string()),
1846                auto_indent: true,
1847                auto_close: None,
1848                auto_surround: None,
1849                textmate_grammar: None,
1850                show_whitespace_tabs: true,
1851                line_wrap: None,
1852                wrap_column: None,
1853                page_view: None,
1854                page_width: None,
1855                use_tabs: None,
1856                tab_size: None,
1857                formatter: None,
1858                format_on_save: false,
1859                on_save: vec![],
1860                word_characters: None,
1861                indent: None,
1862            },
1863        );
1864
1865        // A language with glob "*rc" -> bash grammar
1866        languages.insert(
1867            "rc-files".to_string(),
1868            crate::config::LanguageConfig {
1869                extensions: vec![],
1870                filenames: vec!["*rc".to_string()],
1871                grammar: "bash".to_string(),
1872                comment_prefix: Some("#".to_string()),
1873                auto_indent: true,
1874                auto_close: None,
1875                auto_surround: None,
1876                textmate_grammar: None,
1877                show_whitespace_tabs: true,
1878                line_wrap: None,
1879                wrap_column: None,
1880                page_view: None,
1881                page_width: None,
1882                use_tabs: None,
1883                tab_size: None,
1884                formatter: None,
1885                format_on_save: false,
1886                on_save: vec![],
1887                word_characters: None,
1888                indent: None,
1889            },
1890        );
1891
1892        registry.apply_language_config(&languages);
1893
1894        // "lfrc" should match the exact rule (python), not the glob (bash)
1895        let entry = registry.find_by_path(Path::new("lfrc"), None).unwrap();
1896        assert!(
1897            entry.display_name.to_lowercase().contains("python"),
1898            "exact match should win over glob, got: {}",
1899            entry.display_name
1900        );
1901    }
1902
1903    #[test]
1904    fn test_built_in_aliases_resolve() {
1905        let registry = GrammarRegistry::default();
1906
1907        // "bash" should resolve to "Bourne Again Shell (bash)" via alias
1908        let syntax = registry.find_syntax_by_name("bash");
1909        assert!(syntax.is_some(), "alias 'bash' should resolve");
1910        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1911
1912        // "cpp" should resolve to "C++"
1913        let syntax = registry.find_syntax_by_name("cpp");
1914        assert!(syntax.is_some(), "alias 'cpp' should resolve");
1915        assert_eq!(syntax.unwrap().name, "C++");
1916
1917        // "csharp" should resolve to "C#"
1918        let syntax = registry.find_syntax_by_name("csharp");
1919        assert!(syntax.is_some(), "alias 'csharp' should resolve");
1920        assert_eq!(syntax.unwrap().name, "C#");
1921
1922        // "sh" should also resolve to bash
1923        let syntax = registry.find_syntax_by_name("sh");
1924        assert!(syntax.is_some(), "alias 'sh' should resolve");
1925        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1926
1927        // "proto" should resolve to "Protocol Buffers"
1928        let syntax = registry.find_syntax_by_name("proto");
1929        assert!(syntax.is_some(), "alias 'proto' should resolve");
1930        assert_eq!(syntax.unwrap().name, "Protocol Buffers");
1931    }
1932
1933    #[test]
1934    fn test_alias_case_insensitive_input() {
1935        let registry = GrammarRegistry::default();
1936
1937        // Aliases should be case-insensitive on input
1938        let syntax = registry.find_syntax_by_name("BASH");
1939        assert!(
1940            syntax.is_some(),
1941            "alias 'BASH' should resolve case-insensitively"
1942        );
1943        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1944
1945        let syntax = registry.find_syntax_by_name("Cpp");
1946        assert!(
1947            syntax.is_some(),
1948            "alias 'Cpp' should resolve case-insensitively"
1949        );
1950        assert_eq!(syntax.unwrap().name, "C++");
1951    }
1952
1953    #[test]
1954    fn test_full_name_still_works() {
1955        let registry = GrammarRegistry::default();
1956
1957        // Full names should still work (exact match)
1958        let syntax = registry.find_syntax_by_name("Bourne Again Shell (bash)");
1959        assert!(syntax.is_some(), "full name should still resolve");
1960        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1961
1962        // Case-insensitive full name should still work
1963        let syntax = registry.find_syntax_by_name("bourne again shell (bash)");
1964        assert!(
1965            syntax.is_some(),
1966            "case-insensitive full name should resolve"
1967        );
1968        assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1969    }
1970
1971    #[test]
1972    fn test_alias_does_not_shadow_full_names() {
1973        let registry = GrammarRegistry::default();
1974
1975        // "Rust" should resolve directly via case-insensitive match, not via alias
1976        let syntax = registry.find_syntax_by_name("rust");
1977        assert!(syntax.is_some());
1978        assert_eq!(syntax.unwrap().name, "Rust");
1979
1980        // "Go" should resolve directly
1981        let syntax = registry.find_syntax_by_name("go");
1982        assert!(syntax.is_some());
1983        assert_eq!(syntax.unwrap().name, "Go");
1984    }
1985
1986    #[test]
1987    fn test_register_alias_rejects_collision() {
1988        let mut registry = GrammarRegistry::default();
1989
1990        // Trying to register an alias that maps to two different targets should fail
1991        assert!(registry.register_alias("myalias", "Rust"));
1992        assert!(!registry.register_alias("myalias", "Go"));
1993
1994        // Same mapping is fine (idempotent)
1995        assert!(registry.register_alias("myalias", "Rust"));
1996    }
1997
1998    #[test]
1999    fn test_register_alias_rejects_nonexistent_target() {
2000        let mut registry = GrammarRegistry::default();
2001        assert!(!registry.register_alias("nope", "Nonexistent Grammar"));
2002    }
2003
2004    #[test]
2005    fn test_register_alias_skips_existing_grammar_name() {
2006        let mut registry = GrammarRegistry::default();
2007
2008        // "rust" case-insensitively matches the grammar "Rust", so no alias needed
2009        assert!(!registry.register_alias("rust", "Rust"));
2010        // Should still be resolvable via case-insensitive match
2011        assert!(registry.find_syntax_by_name("rust").is_some());
2012    }
2013
2014    #[test]
2015    fn test_available_grammar_info_includes_short_names() {
2016        let registry = GrammarRegistry::default();
2017        let infos = registry.available_grammar_info();
2018
2019        let bash_info = infos.iter().find(|g| g.name == "Bourne Again Shell (bash)");
2020        assert!(bash_info.is_some(), "bash grammar should be in the list");
2021        let bash_info = bash_info.unwrap();
2022        assert!(
2023            bash_info.short_name.is_some(),
2024            "bash grammar should have a short_name"
2025        );
2026        // The shortest alias for bash is "sh"
2027        assert_eq!(bash_info.short_name.as_deref(), Some("sh"));
2028    }
2029
2030    #[test]
2031    fn test_catalog_contains_each_language_once() {
2032        let registry = GrammarRegistry::default();
2033        let catalog = registry.catalog();
2034
2035        // Every catalog entry must have a unique (case-insensitive) display name.
2036        let mut seen = std::collections::HashSet::new();
2037        for entry in catalog {
2038            let key = entry.display_name.to_lowercase();
2039            assert!(
2040                seen.insert(key.clone()),
2041                "duplicate catalog entry for display_name={:?}",
2042                entry.display_name
2043            );
2044        }
2045
2046        // TypeScript is tree-sitter-only (syntect ships no grammar for it) yet
2047        // must still appear in the catalog.
2048        let ts = registry
2049            .find_by_name("TypeScript")
2050            .expect("TypeScript must be in the catalog");
2051        assert!(ts.engines.syntect.is_none());
2052        assert_eq!(
2053            ts.engines.tree_sitter,
2054            Some(fresh_languages::Language::TypeScript)
2055        );
2056        assert_eq!(ts.language_id, "typescript");
2057        assert!(ts.extensions.iter().any(|e| e == "ts"));
2058
2059        // Languages that exist in both syntect and tree-sitter (Rust, Python)
2060        // must appear exactly once and prefer the syntect engine.
2061        for name in ["Rust", "Python"] {
2062            let entry = registry
2063                .find_by_name(name)
2064                .unwrap_or_else(|| panic!("{} must be in the catalog", name));
2065            assert!(
2066                entry.engines.syntect.is_some(),
2067                "{} should have a syntect index",
2068                name
2069            );
2070            assert!(
2071                entry.engines.tree_sitter.is_some(),
2072                "{} should also have a tree-sitter language",
2073                name
2074            );
2075            // Only one entry with this display name (already checked above),
2076            // but also verify language_id lookup lands on the same entry.
2077            let by_id = registry
2078                .find_by_name(&entry.language_id)
2079                .expect("language_id should resolve");
2080            assert_eq!(by_id.display_name, entry.display_name);
2081        }
2082
2083        // JavaScript is deliberately routed to tree-sitter only — the
2084        // bundled syntect JavaScript grammar mishandles certain template
2085        // literals and bleeds string state into the rest of the file
2086        // (issue #899). The catalog must therefore expose a tree-sitter-only
2087        // entry, even though syntect ships a JavaScript grammar.
2088        let js = registry
2089            .find_by_name("JavaScript")
2090            .expect("JavaScript must be in the catalog");
2091        assert!(
2092            js.engines.syntect.is_none(),
2093            "JavaScript must not be routed to the syntect engine (issue #899)"
2094        );
2095        assert_eq!(
2096            js.engines.tree_sitter,
2097            Some(fresh_languages::Language::JavaScript),
2098            "JavaScript must carry the tree-sitter language"
2099        );
2100
2101        let gdscript = registry
2102            .find_by_path(Path::new("player.gd"), None)
2103            .expect("player.gd should resolve to GDScript");
2104        assert_eq!(gdscript.display_name, "GDScript");
2105        assert_eq!(gdscript.language_id, "gdscript");
2106        assert!(
2107            gdscript.engines.syntect.is_some(),
2108            "GDScript should use the embedded Syntect grammar"
2109        );
2110        assert!(
2111            gdscript.engines.tree_sitter.is_none(),
2112            "GDScript must not carry a tree-sitter parser"
2113        );
2114    }
2115
2116    #[test]
2117    fn test_catalog_find_by_path_and_extension() {
2118        let registry = GrammarRegistry::default();
2119        let ts = registry
2120            .find_by_path(Path::new("foo.ts"), None)
2121            .expect("foo.ts should resolve");
2122        assert_eq!(ts.display_name, "TypeScript");
2123        let rs = registry.find_by_extension("rs").expect("rs should resolve");
2124        assert_eq!(rs.display_name, "Rust");
2125    }
2126
2127    #[test]
2128    fn test_smali_embedded_grammar_loads_and_resolves() {
2129        let syntax = SyntaxDefinition::load_from_str(SMALI_GRAMMAR, true, Some("Smali"))
2130            .expect("Smali grammar should parse");
2131        assert!(syntax.file_extensions.iter().any(|ext| ext == "smali"));
2132
2133        let registry = GrammarRegistry::default();
2134        let entry = registry
2135            .find_by_path(Path::new("MainActivity.smali"), None)
2136            .expect("Smali files should resolve");
2137        assert_eq!(entry.display_name, "Smali");
2138        assert!(entry.engines.syntect.is_some());
2139        assert!(entry.engines.tree_sitter.is_none());
2140    }
2141
2142    /// Build a minimal LanguageConfig for tests.
2143    fn lang_cfg(
2144        grammar: &str,
2145        extensions: &[&str],
2146        filenames: &[&str],
2147    ) -> crate::config::LanguageConfig {
2148        crate::config::LanguageConfig {
2149            extensions: extensions.iter().map(|s| s.to_string()).collect(),
2150            filenames: filenames.iter().map(|s| s.to_string()).collect(),
2151            grammar: grammar.to_string(),
2152            comment_prefix: None,
2153            auto_indent: true,
2154            auto_close: None,
2155            auto_surround: None,
2156            textmate_grammar: None,
2157            show_whitespace_tabs: true,
2158            line_wrap: None,
2159            wrap_column: None,
2160            page_view: None,
2161            page_width: None,
2162            use_tabs: None,
2163            tab_size: None,
2164            formatter: None,
2165            format_on_save: false,
2166            on_save: vec![],
2167            word_characters: None,
2168            indent: None,
2169        }
2170    }
2171
2172    /// Bug #1: a user-declared config key that aliases an existing grammar
2173    /// (e.g. `[languages.mylang] grammar = "Rust"`) must resolve via
2174    /// `find_by_name("mylang")` so the language palette can select it.
2175    #[test]
2176    fn test_user_alias_resolves_via_find_by_name() {
2177        let mut registry = GrammarRegistry::default();
2178        let mut languages = std::collections::HashMap::new();
2179        languages.insert("mylang".to_string(), lang_cfg("Rust", &[], &[]));
2180        registry.apply_language_config(&languages);
2181
2182        let entry = registry
2183            .find_by_name("mylang")
2184            .expect("user-declared alias 'mylang' must resolve");
2185        assert_eq!(entry.display_name, "Rust");
2186    }
2187
2188    /// Bug #2: `register_alias` used to rebuild the catalog from scratch,
2189    /// wiping out everything `apply_language_config` had merged. Registering
2190    /// an alias afterwards must not lose user config.
2191    #[test]
2192    fn test_register_alias_preserves_applied_language_config() {
2193        let mut registry = GrammarRegistry::default();
2194        let mut languages = std::collections::HashMap::new();
2195        languages.insert(
2196            "shell-configs".to_string(),
2197            lang_cfg("bash", &["myconf"], &["*.myconf"]),
2198        );
2199        registry.apply_language_config(&languages);
2200
2201        // Sanity: config applied.
2202        assert!(registry.find_by_extension("myconf").is_some());
2203        assert!(
2204            registry
2205                .find_by_path(Path::new("foo.myconf"), None)
2206                .is_some(),
2207            "glob should match before register_alias"
2208        );
2209
2210        // Registering an alias must not erase the config we just applied.
2211        registry.register_alias("mycustom", "Rust");
2212
2213        assert!(
2214            registry.find_by_extension("myconf").is_some(),
2215            "config extension must survive register_alias"
2216        );
2217        assert!(
2218            registry
2219                .find_by_path(Path::new("foo.myconf"), None)
2220                .is_some(),
2221            "glob must survive register_alias"
2222        );
2223    }
2224
2225    /// Bug #4: `from_syntax_name` used to unconditionally overwrite the
2226    /// catalog's canonical display name with whatever the user typed (e.g.
2227    /// "BASH") — that string ended up in the status bar.
2228    #[test]
2229    fn test_from_syntax_name_preserves_canonical_display_name() {
2230        use crate::primitives::detected_language::DetectedLanguage;
2231        let registry = GrammarRegistry::default();
2232        let languages = std::collections::HashMap::new();
2233
2234        let detected = DetectedLanguage::from_syntax_name("BASH", &registry, &languages)
2235            .expect("BASH should resolve via alias");
2236        assert_eq!(
2237            detected.display_name, "Bourne Again Shell (bash)",
2238            "display_name must be canonical, not user-typed"
2239        );
2240    }
2241
2242    /// A config-only language (no matching syntect grammar) must still appear
2243    /// in the catalog so the language palette can offer it — the old
2244    /// `DetectedLanguage::from_config_language` branch was load-bearing.
2245    #[test]
2246    fn test_config_only_language_appears_in_catalog() {
2247        let mut registry = GrammarRegistry::default();
2248        let mut languages = std::collections::HashMap::new();
2249        languages.insert("elvish".to_string(), lang_cfg("elvish", &["elv"], &[]));
2250        registry.apply_language_config(&languages);
2251
2252        let entry = registry
2253            .find_by_name("elvish")
2254            .expect("elvish should be in the catalog after apply_language_config");
2255        assert!(entry.engines.syntect.is_none());
2256        assert!(entry.engines.tree_sitter.is_none());
2257        assert_eq!(entry.language_id, "elvish");
2258        assert!(entry.extensions.iter().any(|e| e == "elv"));
2259    }
2260
2261    #[test]
2262    fn test_fish_extension_resolves_to_fish_grammar_not_bash() {
2263        // Syntect's stock Bash grammar also advertises `.fish`; the catalog
2264        // strips it so only Fresh's dedicated Fish grammar owns the extension.
2265        let registry = GrammarRegistry::default();
2266        let entry = registry
2267            .find_by_extension("fish")
2268            .expect(".fish should resolve to a grammar entry");
2269
2270        assert_eq!(entry.language_id, "fish");
2271        assert_eq!(entry.display_name, "Fish");
2272        assert!(entry.engines.syntect.is_some());
2273    }
2274
2275    /// Config-declared extensions must override the built-in mapping. If the
2276    /// user says `[languages.typescript-overlay] extensions = ["js"] grammar
2277    /// = "TypeScript"`, then `foo.js` must resolve to TypeScript, not
2278    /// JavaScript.
2279    #[test]
2280    fn test_config_extension_overrides_builtin() {
2281        let mut registry = GrammarRegistry::default();
2282        // Sanity: default mapping is JavaScript.
2283        assert_eq!(
2284            registry.find_by_extension("js").unwrap().display_name,
2285            "JavaScript"
2286        );
2287
2288        let mut languages = std::collections::HashMap::new();
2289        languages.insert(
2290            "ts-overlay".to_string(),
2291            lang_cfg("TypeScript", &["js"], &[]),
2292        );
2293        registry.apply_language_config(&languages);
2294
2295        assert_eq!(
2296            registry.find_by_extension("js").unwrap().display_name,
2297            "TypeScript",
2298            "user-config extension must win over built-in"
2299        );
2300    }
2301
2302    /// Bare filenames listed by syntect grammars (e.g. "Gemfile", "Makefile",
2303    /// "Rakefile") must resolve through `find_by_path`. Syntect stores these
2304    /// in each grammar's `file_extensions` field alongside real extensions
2305    /// like "rb"; its own `find_syntax_for_file` treats them as either. The
2306    /// catalog has to do the same or `HighlightEngine::for_file` breaks for
2307    /// every extensionless config file.
2308    #[test]
2309    fn test_bare_filename_resolves_via_find_by_path() {
2310        let registry = GrammarRegistry::default();
2311        for (filename, expected_substr) in [
2312            ("Gemfile", "ruby"),
2313            ("Rakefile", "ruby"),
2314            ("Vagrantfile", "ruby"),
2315            ("Makefile", "makefile"),
2316            ("GNUmakefile", "makefile"),
2317        ] {
2318            let entry = registry
2319                .find_by_path(Path::new(filename), None)
2320                .unwrap_or_else(|| panic!("{} must resolve via catalog", filename));
2321            assert!(
2322                entry.display_name.to_lowercase().contains(expected_substr),
2323                "{} should resolve to {} grammar, got {}",
2324                filename,
2325                expected_substr,
2326                entry.display_name
2327            );
2328        }
2329    }
2330
2331    /// Languages that have both syntect and tree-sitter (e.g. JavaScript) must
2332    /// expose the union of both engines' extensions. Tree-sitter-javascript
2333    /// knows `.jsx`; syntect's JavaScript grammar does not. Both should route
2334    /// through the JavaScript catalog entry.
2335    #[test]
2336    fn test_jsx_resolves_to_javascript() {
2337        let registry = GrammarRegistry::default();
2338        let entry = registry
2339            .find_by_path(Path::new("foo.jsx"), None)
2340            .expect("foo.jsx must resolve");
2341        assert_eq!(entry.display_name, "JavaScript");
2342    }
2343
2344    /// `rebuild_catalog` must replay the last-applied language config so it
2345    /// can never silently wipe user `[languages]` rules. This is the invariant
2346    /// that keeps `register_alias`, `populate_built_in_aliases`, and any
2347    /// future rebuild callsite safe-by-construction.
2348    #[test]
2349    fn test_rebuild_catalog_replays_language_config() {
2350        let mut registry = GrammarRegistry::default();
2351        let mut languages = std::collections::HashMap::new();
2352        languages.insert(
2353            "myshell".to_string(),
2354            lang_cfg("bash", &["myext"], &["*.myglob"]),
2355        );
2356        registry.apply_language_config(&languages);
2357        assert!(registry.find_by_extension("myext").is_some());
2358        assert!(registry
2359            .find_by_path(Path::new("foo.myglob"), None)
2360            .is_some());
2361
2362        // Force a rebuild — the catalog gets wiped and re-populated from
2363        // syntect / tree-sitter, but user config must come back on top.
2364        registry.rebuild_catalog();
2365        assert!(
2366            registry.find_by_extension("myext").is_some(),
2367            "rebuild_catalog must replay applied user config"
2368        );
2369        assert!(
2370            registry
2371                .find_by_path(Path::new("foo.myglob"), None)
2372                .is_some(),
2373            "rebuild_catalog must replay user globs"
2374        );
2375    }
2376
2377    /// `apply_language_config` must be idempotent: calling it twice with the
2378    /// same config yields the same catalog state.
2379    #[test]
2380    fn test_apply_language_config_idempotent() {
2381        let mut registry = GrammarRegistry::default();
2382        let mut languages = std::collections::HashMap::new();
2383        languages.insert(
2384            "shell-cfg".to_string(),
2385            lang_cfg("bash", &["myconf"], &["*.myconf"]),
2386        );
2387
2388        registry.apply_language_config(&languages);
2389        let first_extensions = registry
2390            .find_by_name("bash")
2391            .unwrap()
2392            .extensions
2393            .iter()
2394            .filter(|e| e == &"myconf")
2395            .count();
2396        let first_globs = registry
2397            .find_by_name("bash")
2398            .unwrap()
2399            .filename_globs
2400            .iter()
2401            .filter(|g| g == &"*.myconf")
2402            .count();
2403        assert_eq!(first_extensions, 1);
2404        assert_eq!(first_globs, 1);
2405
2406        // Second call must not duplicate anything.
2407        registry.apply_language_config(&languages);
2408        let second_extensions = registry
2409            .find_by_name("bash")
2410            .unwrap()
2411            .extensions
2412            .iter()
2413            .filter(|e| e == &"myconf")
2414            .count();
2415        let second_globs = registry
2416            .find_by_name("bash")
2417            .unwrap()
2418            .filename_globs
2419            .iter()
2420            .filter(|g| g == &"*.myconf")
2421            .count();
2422        assert_eq!(second_extensions, 1, "extensions must not duplicate");
2423        assert_eq!(second_globs, 1, "globs must not duplicate");
2424    }
2425
2426    /// Julia: a single-quote after an identifier is the adjoint
2427    /// (conjugate-transpose) postfix operator, not the start of a string. The
2428    /// old grammar pushed a string context on every `'`, so `A'` swallowed
2429    /// the rest of the file until the next quote — wrecking highlighting for
2430    /// any subsequent keyword. Issue #1852.
2431    #[test]
2432    fn test_julia_adjoint_does_not_start_string() {
2433        use syntect::parsing::{ParseState, ScopeStack};
2434
2435        let registry = GrammarRegistry::default();
2436        let syntax_set = registry.syntax_set();
2437        let syntax = registry
2438            .find_syntax_by_name("Julia")
2439            .expect("Julia grammar must be loaded");
2440        let mut state = ParseState::new(syntax);
2441        let mut scopes = ScopeStack::new();
2442
2443        // Adjoint operator followed by code on later lines.
2444        let lines = ["x = A'\n", "function foo()\n", "end\n"];
2445        let mut keyword_line_in_string = false;
2446        let mut found_function_keyword = false;
2447
2448        for line in &lines {
2449            let ops = state.parse_line(line, syntax_set).unwrap();
2450            // Walk byte-by-byte, applying ops as we pass their offset.
2451            let mut op_iter = ops.iter().peekable();
2452            for (byte_idx, _) in line.char_indices() {
2453                while let Some((offset, op)) = op_iter.peek() {
2454                    if *offset <= byte_idx {
2455                        scopes.apply(op).unwrap();
2456                        op_iter.next();
2457                    } else {
2458                        break;
2459                    }
2460                }
2461                let in_string = scopes
2462                    .as_slice()
2463                    .iter()
2464                    .any(|s| s.build_string().starts_with("string."));
2465                let is_function_kw = line[byte_idx..].starts_with("function");
2466                if is_function_kw && in_string {
2467                    keyword_line_in_string = true;
2468                }
2469                if is_function_kw && !in_string {
2470                    found_function_keyword = true;
2471                }
2472            }
2473            // Drain remaining ops at end of line.
2474            for (_, op) in op_iter {
2475                scopes.apply(op).unwrap();
2476            }
2477        }
2478
2479        assert!(
2480            !keyword_line_in_string,
2481            "the `function` keyword after an adjoint operator must not be inside a string scope"
2482        );
2483        assert!(
2484            found_function_keyword,
2485            "test harness must have reached the `function` keyword"
2486        );
2487    }
2488
2489    /// Julia: `'a'` is a valid character literal. The grammar must still
2490    /// scope it as a constant/character so themes can color it. Issue #1852.
2491    #[test]
2492    fn test_julia_char_literal_is_recognized() {
2493        use syntect::parsing::{ParseState, ScopeStack};
2494
2495        let registry = GrammarRegistry::default();
2496        let syntax_set = registry.syntax_set();
2497        let syntax = registry
2498            .find_syntax_by_name("Julia")
2499            .expect("Julia grammar must be loaded");
2500        let mut state = ParseState::new(syntax);
2501        let mut scopes = ScopeStack::new();
2502
2503        let line = "x = 'a'\n";
2504        let ops = state.parse_line(line, syntax_set).unwrap();
2505        let mut saw_constant_or_string_at_quote = false;
2506        let mut op_iter = ops.iter().peekable();
2507        for (byte_idx, _) in line.char_indices() {
2508            while let Some((offset, op)) = op_iter.peek() {
2509                if *offset <= byte_idx {
2510                    scopes.apply(op).unwrap();
2511                    op_iter.next();
2512                } else {
2513                    break;
2514                }
2515            }
2516            if byte_idx == 5 {
2517                // position of 'a' (the char)
2518                let scoped = scopes.as_slice().iter().any(|s| {
2519                    let str = s.build_string();
2520                    str.starts_with("constant.") || str.starts_with("string.")
2521                });
2522                if scoped {
2523                    saw_constant_or_string_at_quote = true;
2524                }
2525            }
2526        }
2527        assert!(
2528            saw_constant_or_string_at_quote,
2529            "char literal 'a' must receive a constant/string scope"
2530        );
2531    }
2532
2533    /// `tree_sitter_for_syntect_name` handles the alias table + strict
2534    /// display-name match. The alias table catches syntect's verbose names;
2535    /// the strict match handles the common case.
2536    #[test]
2537    fn test_tree_sitter_bridge() {
2538        assert_eq!(
2539            tree_sitter_for_syntect_name("Bourne Again Shell (bash)"),
2540            Some(fresh_languages::Language::Bash)
2541        );
2542        assert_eq!(
2543            tree_sitter_for_syntect_name("Rust"),
2544            Some(fresh_languages::Language::Rust)
2545        );
2546        assert_eq!(tree_sitter_for_syntect_name("GDScript"), None);
2547        // Must NOT fuzzy-match Nushell to Bash.
2548        assert_eq!(tree_sitter_for_syntect_name("Nushell"), None);
2549        // Must NOT match arbitrary strings.
2550        assert_eq!(tree_sitter_for_syntect_name("does-not-exist"), None);
2551    }
2552}