1use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10use syntect::parsing::{SyntaxDefinition, SyntaxReference, SyntaxSet, SyntaxSetBuilder};
11
12pub use crate::primitives::glob_match::{
14 filename_glob_matches, is_glob_pattern, is_path_pattern, path_glob_matches,
15};
16
17#[derive(Clone, Debug)]
22pub struct GrammarSpec {
23 pub language: String,
25 pub path: PathBuf,
27 pub extensions: Vec<String>,
29}
30
31#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
33#[serde(tag = "type")]
34pub enum GrammarSource {
35 #[serde(rename = "built-in")]
37 BuiltIn,
38 #[serde(rename = "user")]
40 User { path: PathBuf },
41 #[serde(rename = "language-pack")]
43 LanguagePack { name: String, path: PathBuf },
44 #[serde(rename = "bundle")]
46 Bundle { name: String, path: PathBuf },
47 #[serde(rename = "plugin")]
49 Plugin { plugin: String, path: PathBuf },
50}
51
52impl std::fmt::Display for GrammarSource {
53 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54 match self {
55 GrammarSource::BuiltIn => write!(f, "built-in"),
56 GrammarSource::User { path } => write!(f, "user ({})", path.display()),
57 GrammarSource::LanguagePack { name, .. } => write!(f, "language-pack ({})", name),
58 GrammarSource::Bundle { name, .. } => write!(f, "bundle ({})", name),
59 GrammarSource::Plugin { plugin, .. } => write!(f, "plugin ({})", plugin),
60 }
61 }
62}
63
64#[derive(Clone, Debug, Serialize, Deserialize)]
66pub struct GrammarInfo {
67 pub name: String,
69 pub source: GrammarSource,
71 pub file_extensions: Vec<String>,
73 #[serde(default, skip_serializing_if = "Option::is_none")]
75 pub short_name: Option<String>,
76}
77
78const SYNTECT_TO_TREE_SITTER_ALIASES: &[(&str, fresh_languages::Language)] =
86 &[("Bourne Again Shell (bash)", fresh_languages::Language::Bash)];
87
88fn tree_sitter_for_syntect_name(display_name: &str) -> Option<fresh_languages::Language> {
91 for (syntect_name, lang) in SYNTECT_TO_TREE_SITTER_ALIASES {
92 if *syntect_name == display_name {
93 return Some(*lang);
94 }
95 }
96 fresh_languages::Language::all()
97 .iter()
98 .find(|l| l.display_name() == display_name)
99 .copied()
100}
101
102#[derive(Clone, Debug, Default)]
107pub struct GrammarEngines {
108 pub syntect: Option<usize>,
111 pub tree_sitter: Option<fresh_languages::Language>,
113}
114
115#[derive(Clone, Debug)]
124pub struct GrammarEntry {
125 pub display_name: String,
127 pub language_id: String,
129 pub short_name: Option<String>,
131 pub extensions: Vec<String>,
133 pub filenames: Vec<String>,
135 pub filename_globs: Vec<String>,
137 pub source: GrammarSource,
139 pub engines: GrammarEngines,
141}
142
143pub const TOML_GRAMMAR: &str = include_str!("../../grammars/toml.sublime-syntax");
145
146pub const ODIN_GRAMMAR: &str = include_str!("../../grammars/odin/Odin.sublime-syntax");
149
150pub const ZIG_GRAMMAR: &str = include_str!("../../grammars/zig.sublime-syntax");
152
153pub const GIT_REBASE_GRAMMAR: &str = include_str!("../../grammars/git-rebase.sublime-syntax");
155
156pub const GIT_COMMIT_GRAMMAR: &str = include_str!("../../grammars/git-commit.sublime-syntax");
158
159pub const GITIGNORE_GRAMMAR: &str = include_str!("../../grammars/gitignore.sublime-syntax");
161
162pub const GITCONFIG_GRAMMAR: &str = include_str!("../../grammars/gitconfig.sublime-syntax");
164
165pub const GITATTRIBUTES_GRAMMAR: &str = include_str!("../../grammars/gitattributes.sublime-syntax");
167
168pub const TYPST_GRAMMAR: &str = include_str!("../../grammars/typst.sublime-syntax");
170
171pub const DOCKERFILE_GRAMMAR: &str = include_str!("../../grammars/dockerfile.sublime-syntax");
173pub const INI_GRAMMAR: &str = include_str!("../../grammars/ini.sublime-syntax");
175pub const CMAKE_GRAMMAR: &str = include_str!("../../grammars/cmake.sublime-syntax");
177pub const SCSS_GRAMMAR: &str = include_str!("../../grammars/scss.sublime-syntax");
179pub const LESS_GRAMMAR: &str = include_str!("../../grammars/less.sublime-syntax");
181pub const POWERSHELL_GRAMMAR: &str = include_str!("../../grammars/powershell.sublime-syntax");
183pub const KOTLIN_GRAMMAR: &str = include_str!("../../grammars/kotlin.sublime-syntax");
185pub const SWIFT_GRAMMAR: &str = include_str!("../../grammars/swift.sublime-syntax");
187pub const DART_GRAMMAR: &str = include_str!("../../grammars/dart.sublime-syntax");
189pub const ELIXIR_GRAMMAR: &str = include_str!("../../grammars/elixir.sublime-syntax");
191pub const FSHARP_GRAMMAR: &str = include_str!("../../grammars/fsharp.sublime-syntax");
193pub const NIX_GRAMMAR: &str = include_str!("../../grammars/nix.sublime-syntax");
195pub const HCL_GRAMMAR: &str = include_str!("../../grammars/hcl.sublime-syntax");
197pub const PROTOBUF_GRAMMAR: &str = include_str!("../../grammars/protobuf.sublime-syntax");
199pub const GRAPHQL_GRAMMAR: &str = include_str!("../../grammars/graphql.sublime-syntax");
201pub const JULIA_GRAMMAR: &str = include_str!("../../grammars/julia.sublime-syntax");
203pub const NIM_GRAMMAR: &str = include_str!("../../grammars/nim.sublime-syntax");
205pub const GLEAM_GRAMMAR: &str = include_str!("../../grammars/gleam.sublime-syntax");
207pub const VLANG_GRAMMAR: &str = include_str!("../../grammars/vlang.sublime-syntax");
209pub const SOLIDITY_GRAMMAR: &str = include_str!("../../grammars/solidity.sublime-syntax");
211pub const KDL_GRAMMAR: &str = include_str!("../../grammars/kdl.sublime-syntax");
213pub const NUSHELL_GRAMMAR: &str = include_str!("../../grammars/nushell.sublime-syntax");
215pub const STARLARK_GRAMMAR: &str = include_str!("../../grammars/starlark.sublime-syntax");
217pub const JUSTFILE_GRAMMAR: &str = include_str!("../../grammars/justfile.sublime-syntax");
219pub const EARTHFILE_GRAMMAR: &str = include_str!("../../grammars/earthfile.sublime-syntax");
221pub const GOMOD_GRAMMAR: &str = include_str!("../../grammars/gomod.sublime-syntax");
223pub const VUE_GRAMMAR: &str = include_str!("../../grammars/vue.sublime-syntax");
225pub const SVELTE_GRAMMAR: &str = include_str!("../../grammars/svelte.sublime-syntax");
227pub const ASTRO_GRAMMAR: &str = include_str!("../../grammars/astro.sublime-syntax");
229pub const HYPRLANG_GRAMMAR: &str = include_str!("../../grammars/hyprlang.sublime-syntax");
231pub const AUTOHOTKEY_GRAMMAR: &str =
234 include_str!("../../grammars/autohotkey/AutoHotkey.sublime-syntax");
235
236impl std::fmt::Debug for GrammarRegistry {
241 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
242 f.debug_struct("GrammarRegistry")
243 .field("syntax_count", &self.syntax_set.syntaxes().len())
244 .finish()
245 }
246}
247
248pub struct GrammarRegistry {
249 syntax_set: Arc<SyntaxSet>,
251 user_extensions: HashMap<String, String>,
253 filename_scopes: HashMap<String, String>,
255 loaded_grammar_paths: Vec<GrammarSpec>,
257 grammar_sources: HashMap<String, GrammarInfo>,
259 aliases: HashMap<String, String>,
263 catalog: Vec<GrammarEntry>,
267 catalog_by_name: HashMap<String, usize>,
270 catalog_by_extension: HashMap<String, usize>,
272 catalog_by_filename: HashMap<String, usize>,
274 applied_language_config: HashMap<String, crate::config::LanguageConfig>,
279}
280
281impl GrammarRegistry {
282 pub(crate) fn new(
287 syntax_set: SyntaxSet,
288 user_extensions: HashMap<String, String>,
289 filename_scopes: HashMap<String, String>,
290 ) -> Self {
291 Self::new_with_loaded_paths(
292 syntax_set,
293 user_extensions,
294 filename_scopes,
295 Vec::new(),
296 HashMap::new(),
297 )
298 }
299
300 pub(crate) fn new_with_loaded_paths(
305 syntax_set: SyntaxSet,
306 user_extensions: HashMap<String, String>,
307 filename_scopes: HashMap<String, String>,
308 loaded_grammar_paths: Vec<GrammarSpec>,
309 grammar_sources: HashMap<String, GrammarInfo>,
310 ) -> Self {
311 let mut reg = Self {
312 syntax_set: Arc::new(syntax_set),
313 user_extensions,
314 filename_scopes,
315 loaded_grammar_paths,
316 grammar_sources,
317 aliases: HashMap::new(),
318 catalog: Vec::new(),
319 catalog_by_name: HashMap::new(),
320 catalog_by_extension: HashMap::new(),
321 catalog_by_filename: HashMap::new(),
322 applied_language_config: HashMap::new(),
323 };
324 reg.rebuild_catalog();
325 reg
326 }
327
328 pub fn empty() -> Arc<Self> {
330 let mut builder = SyntaxSetBuilder::new();
331 builder.add_plain_text_syntax();
332 let mut reg = Self {
333 syntax_set: Arc::new(builder.build()),
334 user_extensions: HashMap::new(),
335 filename_scopes: HashMap::new(),
336 loaded_grammar_paths: Vec::new(),
337 grammar_sources: HashMap::new(),
338 aliases: HashMap::new(),
339 catalog: Vec::new(),
340 catalog_by_name: HashMap::new(),
341 catalog_by_extension: HashMap::new(),
342 catalog_by_filename: HashMap::new(),
343 applied_language_config: HashMap::new(),
344 };
345 reg.rebuild_catalog();
346 Arc::new(reg)
347 }
348
349 pub fn defaults_only() -> Arc<Self> {
356 tracing::info!("defaults_only: loading pre-compiled syntax packdump...");
360 let syntax_set: SyntaxSet = syntect::dumps::from_uncompressed_data(include_bytes!(
361 concat!(env!("OUT_DIR"), "/default_syntaxes.packdump")
362 ))
363 .expect("Failed to load pre-compiled syntax packdump");
364 tracing::info!(
365 "defaults_only: loaded ({} syntaxes)",
366 syntax_set.syntaxes().len()
367 );
368 let grammar_sources = Self::build_grammar_sources_from_syntax_set(&syntax_set);
369 let filename_scopes = Self::build_filename_scopes();
370 let extra_extensions = Self::build_extra_extensions();
371 let mut registry = Self {
372 syntax_set: Arc::new(syntax_set),
373 user_extensions: extra_extensions,
374 filename_scopes,
375 loaded_grammar_paths: Vec::new(),
376 grammar_sources,
377 aliases: HashMap::new(),
378 catalog: Vec::new(),
379 catalog_by_name: HashMap::new(),
380 catalog_by_extension: HashMap::new(),
381 catalog_by_filename: HashMap::new(),
382 applied_language_config: HashMap::new(),
383 };
384 registry.populate_built_in_aliases();
385 registry.rebuild_catalog();
386 Arc::new(registry)
387 }
388
389 pub(crate) fn build_extra_extensions() -> HashMap<String, String> {
394 let mut map = HashMap::new();
395
396 let js_scope = "source.js".to_string();
398 map.insert("cjs".to_string(), js_scope.clone());
399 map.insert("mjs".to_string(), js_scope);
400
401 map
405 }
406
407 pub(crate) fn build_filename_scopes() -> HashMap<String, String> {
409 let mut map = HashMap::new();
410
411 let shell_scope = "source.shell.bash".to_string();
413 for filename in [
414 ".zshrc",
415 ".zprofile",
416 ".zshenv",
417 ".zlogin",
418 ".zlogout",
419 ".bash_aliases",
420 "PKGBUILD",
423 "APKBUILD",
424 ] {
425 map.insert(filename.to_string(), shell_scope.clone());
426 }
427
428 let git_rebase_scope = "source.git-rebase-todo".to_string();
430 map.insert("git-rebase-todo".to_string(), git_rebase_scope);
431
432 let git_commit_scope = "source.git-commit".to_string();
434 for filename in ["COMMIT_EDITMSG", "MERGE_MSG", "SQUASH_MSG", "TAG_EDITMSG"] {
435 map.insert(filename.to_string(), git_commit_scope.clone());
436 }
437
438 let gitignore_scope = "source.gitignore".to_string();
440 for filename in [".gitignore", ".dockerignore", ".npmignore", ".hgignore"] {
441 map.insert(filename.to_string(), gitignore_scope.clone());
442 }
443
444 let gitconfig_scope = "source.gitconfig".to_string();
446 for filename in [".gitconfig", ".gitmodules"] {
447 map.insert(filename.to_string(), gitconfig_scope.clone());
448 }
449
450 let gitattributes_scope = "source.gitattributes".to_string();
452 map.insert(".gitattributes".to_string(), gitattributes_scope);
453
454 let groovy_scope = "source.groovy".to_string();
456 map.insert("Jenkinsfile".to_string(), groovy_scope);
457
458 let ruby_scope = "source.ruby".to_string();
461 map.insert("Brewfile".to_string(), ruby_scope);
462
463 let dockerfile_scope = "source.dockerfile".to_string();
465 map.insert("Dockerfile".to_string(), dockerfile_scope.clone());
466 map.insert("Containerfile".to_string(), dockerfile_scope.clone());
467 map.insert("Dockerfile.dev".to_string(), dockerfile_scope.clone());
469 map.insert("Dockerfile.prod".to_string(), dockerfile_scope.clone());
470 map.insert("Dockerfile.test".to_string(), dockerfile_scope.clone());
471 map.insert("Dockerfile.build".to_string(), dockerfile_scope.clone());
472
473 let cmake_scope = "source.cmake".to_string();
475 map.insert("CMakeLists.txt".to_string(), cmake_scope);
476
477 let starlark_scope = "source.starlark".to_string();
479 map.insert("BUILD".to_string(), starlark_scope.clone());
480 map.insert("BUILD.bazel".to_string(), starlark_scope.clone());
481 map.insert("WORKSPACE".to_string(), starlark_scope.clone());
482 map.insert("WORKSPACE.bazel".to_string(), starlark_scope.clone());
483 map.insert("Tiltfile".to_string(), starlark_scope);
484
485 let justfile_scope = "source.justfile".to_string();
487 map.insert("justfile".to_string(), justfile_scope.clone());
488 map.insert("Justfile".to_string(), justfile_scope.clone());
489 map.insert(".justfile".to_string(), justfile_scope);
490
491 let ini_scope = "source.ini".to_string();
493 map.insert(".editorconfig".to_string(), ini_scope);
494
495 let earthfile_scope = "source.earthfile".to_string();
497 map.insert("Earthfile".to_string(), earthfile_scope);
498
499 let hyprlang_scope = "source.hyprlang".to_string();
501 map.insert("hyprland.conf".to_string(), hyprlang_scope.clone());
502 map.insert("hyprpaper.conf".to_string(), hyprlang_scope.clone());
503 map.insert("hyprlock.conf".to_string(), hyprlang_scope);
504
505 let gomod_scope = "source.gomod".to_string();
507 map.insert("go.mod".to_string(), gomod_scope.clone());
508 map.insert("go.sum".to_string(), gomod_scope);
509
510 map
511 }
512
513 pub(crate) fn add_embedded_grammars(builder: &mut SyntaxSetBuilder) {
515 match SyntaxDefinition::load_from_str(TOML_GRAMMAR, true, Some("TOML")) {
517 Ok(syntax) => {
518 builder.add(syntax);
519 tracing::debug!("Loaded embedded TOML grammar");
520 }
521 Err(e) => {
522 tracing::warn!("Failed to load embedded TOML grammar: {}", e);
523 }
524 }
525
526 match SyntaxDefinition::load_from_str(ODIN_GRAMMAR, true, Some("Odin")) {
528 Ok(syntax) => {
529 builder.add(syntax);
530 tracing::debug!("Loaded embedded Odin grammar");
531 }
532 Err(e) => {
533 tracing::warn!("Failed to load embedded Odin grammar: {}", e);
534 }
535 }
536
537 match SyntaxDefinition::load_from_str(ZIG_GRAMMAR, true, Some("Zig")) {
539 Ok(syntax) => {
540 builder.add(syntax);
541 tracing::debug!("Loaded embedded Zig grammar");
542 }
543 Err(e) => {
544 tracing::warn!("Failed to load embedded Zig grammar: {}", e);
545 }
546 }
547
548 match SyntaxDefinition::load_from_str(GIT_REBASE_GRAMMAR, true, Some("Git Rebase Todo")) {
550 Ok(syntax) => {
551 builder.add(syntax);
552 tracing::debug!("Loaded embedded Git Rebase Todo grammar");
553 }
554 Err(e) => {
555 tracing::warn!("Failed to load embedded Git Rebase Todo grammar: {}", e);
556 }
557 }
558
559 match SyntaxDefinition::load_from_str(GIT_COMMIT_GRAMMAR, true, Some("Git Commit Message"))
561 {
562 Ok(syntax) => {
563 builder.add(syntax);
564 tracing::debug!("Loaded embedded Git Commit Message grammar");
565 }
566 Err(e) => {
567 tracing::warn!("Failed to load embedded Git Commit Message grammar: {}", e);
568 }
569 }
570
571 match SyntaxDefinition::load_from_str(GITIGNORE_GRAMMAR, true, Some("Gitignore")) {
573 Ok(syntax) => {
574 builder.add(syntax);
575 tracing::debug!("Loaded embedded Gitignore grammar");
576 }
577 Err(e) => {
578 tracing::warn!("Failed to load embedded Gitignore grammar: {}", e);
579 }
580 }
581
582 match SyntaxDefinition::load_from_str(GITCONFIG_GRAMMAR, true, Some("Git Config")) {
584 Ok(syntax) => {
585 builder.add(syntax);
586 tracing::debug!("Loaded embedded Git Config grammar");
587 }
588 Err(e) => {
589 tracing::warn!("Failed to load embedded Git Config grammar: {}", e);
590 }
591 }
592
593 match SyntaxDefinition::load_from_str(GITATTRIBUTES_GRAMMAR, true, Some("Git Attributes")) {
595 Ok(syntax) => {
596 builder.add(syntax);
597 tracing::debug!("Loaded embedded Git Attributes grammar");
598 }
599 Err(e) => {
600 tracing::warn!("Failed to load embedded Git Attributes grammar: {}", e);
601 }
602 }
603
604 match SyntaxDefinition::load_from_str(TYPST_GRAMMAR, true, Some("Typst")) {
606 Ok(syntax) => {
607 builder.add(syntax);
608 tracing::debug!("Loaded embedded Typst grammar");
609 }
610 Err(e) => {
611 tracing::warn!("Failed to load embedded Typst grammar: {}", e);
612 }
613 }
614
615 let additional_grammars: &[(&str, &str)] = &[
617 (DOCKERFILE_GRAMMAR, "Dockerfile"),
618 (INI_GRAMMAR, "INI"),
619 (CMAKE_GRAMMAR, "CMake"),
620 (SCSS_GRAMMAR, "SCSS"),
621 (LESS_GRAMMAR, "LESS"),
622 (POWERSHELL_GRAMMAR, "PowerShell"),
623 (KOTLIN_GRAMMAR, "Kotlin"),
624 (SWIFT_GRAMMAR, "Swift"),
625 (DART_GRAMMAR, "Dart"),
626 (ELIXIR_GRAMMAR, "Elixir"),
627 (FSHARP_GRAMMAR, "FSharp"),
628 (NIX_GRAMMAR, "Nix"),
629 (HCL_GRAMMAR, "HCL"),
630 (PROTOBUF_GRAMMAR, "Protocol Buffers"),
631 (GRAPHQL_GRAMMAR, "GraphQL"),
632 (JULIA_GRAMMAR, "Julia"),
633 (NIM_GRAMMAR, "Nim"),
634 (GLEAM_GRAMMAR, "Gleam"),
635 (VLANG_GRAMMAR, "V"),
636 (SOLIDITY_GRAMMAR, "Solidity"),
637 (KDL_GRAMMAR, "KDL"),
638 (NUSHELL_GRAMMAR, "Nushell"),
639 (STARLARK_GRAMMAR, "Starlark"),
640 (JUSTFILE_GRAMMAR, "Justfile"),
641 (EARTHFILE_GRAMMAR, "Earthfile"),
642 (GOMOD_GRAMMAR, "Go Module"),
643 (VUE_GRAMMAR, "Vue"),
644 (SVELTE_GRAMMAR, "Svelte"),
645 (ASTRO_GRAMMAR, "Astro"),
646 (HYPRLANG_GRAMMAR, "Hyprlang"),
647 (AUTOHOTKEY_GRAMMAR, "AutoHotkey"),
648 ];
649
650 for (grammar_str, name) in additional_grammars {
651 match SyntaxDefinition::load_from_str(grammar_str, true, Some(name)) {
652 Ok(syntax) => {
653 builder.add(syntax);
654 tracing::debug!("Loaded embedded {} grammar", name);
655 }
656 Err(e) => {
657 tracing::warn!("Failed to load embedded {} grammar: {}", name, e);
658 }
659 }
660 }
661 }
662
663 pub fn find_syntax_for_file(&self, path: &Path) -> Option<&SyntaxReference> {
669 let entry = self.find_by_path(path, None)?;
670 entry
671 .engines
672 .syntect
673 .map(|i| &self.syntax_set.syntaxes()[i])
674 }
675
676 pub fn find_syntax_by_name(&self, name: &str) -> Option<&SyntaxReference> {
684 if let Some(entry) = self.find_by_name(name) {
685 if let Some(idx) = entry.engines.syntect {
686 return Some(&self.syntax_set.syntaxes()[idx]);
687 }
688 }
689 self.syntax_set.find_syntax_by_name(name)
693 }
694
695 fn built_in_aliases() -> Vec<(&'static str, &'static str)> {
704 vec![
705 ("bash", "Bourne Again Shell (bash)"),
707 ("shell", "Bourne Again Shell (bash)"),
708 ("sh", "Bourne Again Shell (bash)"),
709 ("c++", "C++"),
710 ("cpp", "C++"),
711 ("csharp", "C#"),
712 ("objc", "Objective-C"),
713 ("objcpp", "Objective-C++"),
714 ("regex", "Regular Expressions (Python)"),
715 ("regexp", "Regular Expressions (Python)"),
716 ("proto", "Protocol Buffers"),
718 ("protobuf", "Protocol Buffers"),
719 ("gomod", "Go Module"),
720 ("git-rebase", "Git Rebase Todo"),
721 ("git-commit", "Git Commit Message"),
722 ("git-config", "Git Config"),
723 ("git-attributes", "Git Attributes"),
724 ("gitignore", "Gitignore"),
725 ("fsharp", "FSharp"),
726 ("f#", "FSharp"),
727 ("terraform", "HCL"),
728 ("tf", "HCL"),
729 ("ts", "TypeScript"),
730 ("js", "JavaScript"),
731 ("py", "Python"),
732 ("rb", "Ruby"),
733 ("rs", "Rust"),
734 ("md", "Markdown"),
735 ("yml", "YAML"),
736 ("dockerfile", "Dockerfile"),
737 ]
738 }
739
740 pub(crate) fn populate_built_in_aliases(&mut self) {
747 for (short, full) in Self::built_in_aliases() {
748 self.register_alias_inner(short, full, true);
749 }
750 self.rebuild_catalog();
751 }
752
753 pub(crate) fn register_alias(&mut self, short_name: &str, full_name: &str) -> bool {
763 if !self.register_alias_inner(short_name, full_name, false) {
764 return false;
765 }
766 let short_lower = short_name.to_lowercase();
767 let full_lower = full_name.to_lowercase();
768 if let Some(&idx) = self.catalog_by_name.get(&full_lower) {
769 self.catalog_by_name
770 .entry(short_lower.clone())
771 .or_insert(idx);
772 let entry = &mut self.catalog[idx];
773 let replace = match &entry.short_name {
774 None => true,
775 Some(existing) => short_name.len() < existing.len(),
776 };
777 if replace {
778 entry.short_name = Some(short_lower);
779 }
780 }
781 true
782 }
783
784 fn register_alias_inner(
785 &mut self,
786 short_name: &str,
787 full_name: &str,
788 is_built_in: bool,
789 ) -> bool {
790 let short_lower = short_name.to_lowercase();
791
792 let target_exists = self
794 .syntax_set
795 .syntaxes()
796 .iter()
797 .any(|s| s.name.eq_ignore_ascii_case(full_name));
798 if !target_exists {
799 if tree_sitter_for_syntect_name(full_name).is_some() {
803 return false;
804 }
805 if is_built_in {
806 tracing::warn!(
809 "[grammar-alias] Built-in alias '{}' -> '{}': target grammar not found, skipping",
810 short_name, full_name
811 );
812 } else {
813 tracing::warn!(
814 "[grammar-alias] Alias '{}' -> '{}': target grammar not found, skipping",
815 short_name,
816 full_name
817 );
818 }
819 return false;
820 }
821
822 let collides_with_full_name = self
824 .syntax_set
825 .syntaxes()
826 .iter()
827 .any(|s| s.name.eq_ignore_ascii_case(&short_lower));
828 if collides_with_full_name {
829 tracing::debug!(
833 "[grammar-alias] Alias '{}' matches an existing grammar name, skipping (not needed)",
834 short_name
835 );
836 return false;
837 }
838
839 if let Some(existing_target) = self.aliases.get(&short_lower) {
841 if existing_target.eq_ignore_ascii_case(full_name) {
842 return true;
844 }
845 let msg = format!(
846 "Alias '{}' already maps to '{}', cannot remap to '{}'",
847 short_name, existing_target, full_name
848 );
849 if is_built_in {
850 panic!("[grammar-alias] Built-in alias collision: {}", msg);
851 } else {
852 tracing::warn!("[grammar-alias] {}", msg);
853 return false;
854 }
855 }
856
857 let exact_name = self
859 .syntax_set
860 .syntaxes()
861 .iter()
862 .find(|s| s.name.eq_ignore_ascii_case(full_name))
863 .map(|s| s.name.clone())
864 .unwrap();
865
866 self.aliases.insert(short_lower, exact_name);
867 true
868 }
869
870 pub(crate) fn rebuild_catalog(&mut self) {
885 let mut short_by_full: HashMap<String, String> = HashMap::new();
892 let record = |map: &mut HashMap<String, String>, short: &str, full: &str| {
893 let key = full.to_lowercase();
894 let keep = match map.get(&key) {
895 None => true,
896 Some(existing) => short.len() < existing.len(),
897 };
898 if keep {
899 map.insert(key, short.to_string());
900 }
901 };
902 for (short, full) in Self::built_in_aliases() {
903 record(&mut short_by_full, short, full);
904 }
905 for (short, full) in &self.aliases {
906 record(&mut short_by_full, short, full);
907 }
908
909 let derive_language_id =
910 |display_name: &str| -> (String, Option<fresh_languages::Language>) {
911 let ts = tree_sitter_for_syntect_name(display_name);
912 let id = ts
913 .map(|l| l.id().to_string())
914 .unwrap_or_else(|| display_name.to_lowercase());
915 (id, ts)
916 };
917
918 let mut catalog: Vec<GrammarEntry> = Vec::new();
919 let mut scope_to_index: HashMap<String, usize> = HashMap::new();
920
921 for (idx, syntax) in self.syntax_set.syntaxes().iter().enumerate() {
931 if syntax.name == "Plain Text" {
932 continue;
933 }
934 let (language_id, tree_sitter) = derive_language_id(&syntax.name);
935 let short_name = short_by_full.get(&syntax.name.to_lowercase()).cloned();
936 let source = self
937 .grammar_sources
938 .get(&syntax.name)
939 .map(|info| info.source.clone())
940 .unwrap_or(GrammarSource::BuiltIn);
941 let entry_index = catalog.len();
942 scope_to_index.insert(syntax.scope.to_string(), entry_index);
943
944 let mut extensions = syntax.file_extensions.clone();
950 if let Some(lang) = tree_sitter {
951 for ext in lang.extensions() {
952 let ext = ext.to_string();
953 if !extensions.iter().any(|e| e == &ext) {
954 extensions.push(ext);
955 }
956 }
957 }
958
959 catalog.push(GrammarEntry {
960 display_name: syntax.name.clone(),
961 language_id,
962 short_name,
963 extensions,
964 filenames: Vec::new(),
965 filename_globs: Vec::new(),
966 source,
967 engines: GrammarEngines {
968 syntect: Some(idx),
969 tree_sitter,
970 },
971 });
972 }
973
974 for (filename, scope) in &self.filename_scopes {
976 if let Some(&idx) = scope_to_index.get(scope) {
977 if !catalog[idx].filenames.iter().any(|f| f == filename) {
978 catalog[idx].filenames.push(filename.clone());
979 }
980 }
981 }
982
983 for (ext, scope) in &self.user_extensions {
985 if let Some(&idx) = scope_to_index.get(scope) {
986 if !catalog[idx].extensions.iter().any(|e| e == ext) {
987 catalog[idx].extensions.push(ext.clone());
988 }
989 }
990 }
991
992 let mut ts_covered: std::collections::HashSet<fresh_languages::Language> =
997 std::collections::HashSet::new();
998 for entry in &catalog {
999 if let Some(lang) = entry.engines.tree_sitter {
1000 ts_covered.insert(lang);
1001 }
1002 }
1003 for lang in fresh_languages::Language::all() {
1004 if ts_covered.contains(lang) {
1005 continue;
1006 }
1007 let display_name = lang.display_name().to_string();
1008 let language_id = lang.id().to_string();
1009 let short_name = short_by_full.get(&display_name.to_lowercase()).cloned();
1010 let extensions: Vec<String> = lang.extensions().iter().map(|s| s.to_string()).collect();
1011 catalog.push(GrammarEntry {
1012 display_name,
1013 language_id,
1014 short_name,
1015 extensions,
1016 filenames: Vec::new(),
1017 filename_globs: Vec::new(),
1018 source: GrammarSource::BuiltIn,
1019 engines: GrammarEngines {
1020 syntect: None,
1021 tree_sitter: Some(*lang),
1022 },
1023 });
1024 }
1025
1026 let mut by_name: HashMap<String, usize> = HashMap::new();
1034 let mut by_extension: HashMap<String, usize> = HashMap::new();
1035 let mut by_filename: HashMap<String, usize> = HashMap::new();
1036 for (idx, entry) in catalog.iter().enumerate() {
1037 by_name.insert(entry.display_name.to_lowercase(), idx);
1038 by_name.insert(entry.language_id.to_lowercase(), idx);
1039 if let Some(short) = &entry.short_name {
1040 by_name.insert(short.to_lowercase(), idx);
1041 }
1042 for ext in &entry.extensions {
1043 by_extension.entry(ext.to_lowercase()).or_insert(idx);
1044 by_filename.entry(ext.clone()).or_insert(idx);
1045 }
1046 for filename in &entry.filenames {
1047 by_filename.entry(filename.clone()).or_insert(idx);
1048 }
1049 }
1050
1051 self.catalog = catalog;
1052 self.catalog_by_name = by_name;
1053 self.catalog_by_extension = by_extension;
1054 self.catalog_by_filename = by_filename;
1055
1056 if !self.applied_language_config.is_empty() {
1060 let cfg = std::mem::take(&mut self.applied_language_config);
1061 self.apply_language_config_inner(&cfg);
1062 self.applied_language_config = cfg;
1063 }
1064 }
1065
1066 pub fn catalog(&self) -> &[GrammarEntry] {
1068 &self.catalog
1069 }
1070
1071 pub fn find_by_name(&self, name: &str) -> Option<&GrammarEntry> {
1077 self.catalog_by_name
1078 .get(&name.to_lowercase())
1079 .map(|&idx| &self.catalog[idx])
1080 }
1081
1082 pub fn find_by_path(&self, path: &Path, first_line: Option<&str>) -> Option<&GrammarEntry> {
1103 let filename = path.file_name().and_then(|n| n.to_str());
1104 let path_str = path.to_str().unwrap_or("");
1105
1106 if let Some(name) = filename {
1107 if let Some(&idx) = self.catalog_by_filename.get(name) {
1108 return Some(&self.catalog[idx]);
1109 }
1110 }
1111
1112 if let Some(name) = filename {
1114 for entry in &self.catalog {
1115 for pattern in &entry.filename_globs {
1116 let matched = if is_path_pattern(pattern) {
1117 path_glob_matches(pattern, path_str)
1118 } else {
1119 filename_glob_matches(pattern, name)
1120 };
1121 if matched {
1122 return Some(entry);
1123 }
1124 }
1125 }
1126 }
1127
1128 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1129 if let Some(entry) = self.find_by_extension(ext) {
1130 return Some(entry);
1131 }
1132 }
1133
1134 let line = first_line?;
1139 let syntax = self.syntax_set.find_syntax_by_first_line(line)?;
1140 self.find_by_name(&syntax.name)
1141 }
1142
1143 pub fn find_by_extension(&self, ext: &str) -> Option<&GrammarEntry> {
1145 self.catalog_by_extension
1146 .get(&ext.to_lowercase())
1147 .map(|&idx| &self.catalog[idx])
1148 }
1149
1150 pub fn apply_language_config(
1163 &mut self,
1164 languages: &HashMap<String, crate::config::LanguageConfig>,
1165 ) {
1166 self.applied_language_config = languages.clone();
1167 self.apply_language_config_inner(languages);
1168 }
1169
1170 fn apply_language_config_inner(
1175 &mut self,
1176 languages: &HashMap<String, crate::config::LanguageConfig>,
1177 ) {
1178 for (lang_id, lang_cfg) in languages {
1179 let grammar_name = if lang_cfg.grammar.is_empty() {
1180 lang_id.as_str()
1181 } else {
1182 lang_cfg.grammar.as_str()
1183 };
1184
1185 let idx = self
1187 .catalog_by_name
1188 .get(&grammar_name.to_lowercase())
1189 .copied()
1190 .or_else(|| self.catalog_by_name.get(&lang_id.to_lowercase()).copied())
1191 .unwrap_or_else(|| {
1192 let idx = self.catalog.len();
1193 self.catalog.push(GrammarEntry {
1194 display_name: lang_id.clone(),
1195 language_id: lang_id.clone(),
1196 short_name: None,
1197 extensions: Vec::new(),
1198 filenames: Vec::new(),
1199 filename_globs: Vec::new(),
1200 source: GrammarSource::BuiltIn,
1201 engines: GrammarEngines::default(),
1202 });
1203 idx
1204 });
1205
1206 self.catalog_by_name
1211 .entry(lang_id.to_lowercase())
1212 .or_insert(idx);
1213
1214 for ext in &lang_cfg.extensions {
1215 if !self.catalog[idx].extensions.iter().any(|e| e == ext) {
1216 self.catalog[idx].extensions.push(ext.clone());
1217 }
1218 self.catalog_by_extension.insert(ext.to_lowercase(), idx);
1220 }
1221 for filename in &lang_cfg.filenames {
1222 if is_glob_pattern(filename) {
1223 if !self.catalog[idx]
1224 .filename_globs
1225 .iter()
1226 .any(|f| f == filename)
1227 {
1228 self.catalog[idx].filename_globs.push(filename.clone());
1229 }
1230 } else {
1231 if !self.catalog[idx].filenames.iter().any(|f| f == filename) {
1232 self.catalog[idx].filenames.push(filename.clone());
1233 }
1234 self.catalog_by_filename.insert(filename.clone(), idx);
1235 }
1236 }
1237 }
1238 }
1239
1240 pub fn syntax_set(&self) -> &Arc<SyntaxSet> {
1242 &self.syntax_set
1243 }
1244
1245 pub fn syntax_set_arc(&self) -> Arc<SyntaxSet> {
1247 Arc::clone(&self.syntax_set)
1248 }
1249
1250 pub fn available_syntaxes(&self) -> Vec<&str> {
1252 self.syntax_set
1253 .syntaxes()
1254 .iter()
1255 .map(|s| s.name.as_str())
1256 .collect()
1257 }
1258
1259 pub fn available_grammar_info(&self) -> Vec<GrammarInfo> {
1266 let mut result: Vec<GrammarInfo> = self
1267 .catalog
1268 .iter()
1269 .map(|entry| GrammarInfo {
1270 name: entry.display_name.clone(),
1271 source: entry.source.clone(),
1272 file_extensions: entry.extensions.clone(),
1273 short_name: entry.short_name.clone(),
1274 })
1275 .collect();
1276 result.sort_by(|a, b| a.name.to_lowercase().cmp(&b.name.to_lowercase()));
1277 result
1278 }
1279
1280 pub(crate) fn grammar_sources(&self) -> &HashMap<String, GrammarInfo> {
1282 &self.grammar_sources
1283 }
1284
1285 pub(crate) fn build_grammar_sources_from_syntax_set(
1289 syntax_set: &SyntaxSet,
1290 ) -> HashMap<String, GrammarInfo> {
1291 let mut sources = HashMap::new();
1292 for syntax in syntax_set.syntaxes() {
1293 sources.insert(
1294 syntax.name.clone(),
1295 GrammarInfo {
1296 name: syntax.name.clone(),
1297 source: GrammarSource::BuiltIn,
1298 file_extensions: syntax.file_extensions.clone(),
1299 short_name: None,
1300 },
1301 );
1302 }
1303 sources
1304 }
1305
1306 #[cfg(test)]
1308 pub(crate) fn user_extensions(&self) -> &HashMap<String, String> {
1309 &self.user_extensions
1310 }
1311
1312 #[cfg(test)]
1314 pub(crate) fn loaded_grammar_paths(&self) -> &[GrammarSpec] {
1315 &self.loaded_grammar_paths
1316 }
1317
1318 pub fn with_additional_grammars(
1332 base: &GrammarRegistry,
1333 additional: &[GrammarSpec],
1334 ) -> Option<Self> {
1335 tracing::info!(
1336 "[SYNTAX DEBUG] with_additional_grammars: adding {} grammars to base with {} syntaxes",
1337 additional.len(),
1338 base.syntax_set.syntaxes().len()
1339 );
1340
1341 let mut builder = (*base.syntax_set).clone().into_builder();
1345
1346 let mut user_extensions = base.user_extensions.clone();
1348
1349 let mut loaded_grammar_paths = base.loaded_grammar_paths.clone();
1351
1352 let mut grammar_sources = base.grammar_sources.clone();
1354
1355 for spec in additional {
1357 tracing::info!(
1358 "[SYNTAX DEBUG] loading new grammar file: lang='{}', path={:?}, extensions={:?}",
1359 spec.language,
1360 spec.path,
1361 spec.extensions
1362 );
1363 match Self::load_grammar_file(&spec.path) {
1364 Ok(syntax) => {
1365 let scope = syntax.scope.to_string();
1366 let syntax_name = syntax.name.clone();
1367 tracing::info!(
1368 "[SYNTAX DEBUG] grammar loaded successfully: name='{}', scope='{}'",
1369 syntax_name,
1370 scope
1371 );
1372 builder.add(syntax);
1373 tracing::info!(
1374 "Loaded grammar for '{}' from {:?} with extensions {:?}",
1375 spec.language,
1376 spec.path,
1377 spec.extensions
1378 );
1379 for ext in &spec.extensions {
1381 user_extensions.insert(ext.clone(), scope.clone());
1382 }
1383 grammar_sources.insert(
1385 syntax_name.clone(),
1386 GrammarInfo {
1387 name: syntax_name,
1388 source: GrammarSource::Plugin {
1389 plugin: spec.language.clone(),
1390 path: spec.path.clone(),
1391 },
1392 file_extensions: spec.extensions.clone(),
1393 short_name: None,
1394 },
1395 );
1396 loaded_grammar_paths.push(spec.clone());
1398 }
1399 Err(e) => {
1400 tracing::warn!(
1401 "Failed to load grammar for '{}' from {:?}: {}",
1402 spec.language,
1403 spec.path,
1404 e
1405 );
1406 }
1407 }
1408 }
1409
1410 let mut reg = Self {
1411 syntax_set: Arc::new(builder.build()),
1412 user_extensions,
1413 filename_scopes: base.filename_scopes.clone(),
1414 loaded_grammar_paths,
1415 grammar_sources,
1416 aliases: base.aliases.clone(),
1417 catalog: Vec::new(),
1418 catalog_by_name: HashMap::new(),
1419 catalog_by_extension: HashMap::new(),
1420 catalog_by_filename: HashMap::new(),
1421 applied_language_config: HashMap::new(),
1422 };
1423 reg.rebuild_catalog();
1424 Some(reg)
1425 }
1426
1427 pub(crate) fn load_grammar_file(path: &Path) -> Result<SyntaxDefinition, String> {
1433 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
1434
1435 match ext {
1436 "sublime-syntax" => {
1437 let content = std::fs::read_to_string(path)
1438 .map_err(|e| format!("Failed to read file: {}", e))?;
1439 SyntaxDefinition::load_from_str(
1440 &content,
1441 true,
1442 path.file_stem().and_then(|s| s.to_str()),
1443 )
1444 .map_err(|e| format!("Failed to parse sublime-syntax: {}", e))
1445 }
1446 _ => Err(format!(
1447 "Unsupported grammar format: .{}. Only .sublime-syntax is supported.",
1448 ext
1449 )),
1450 }
1451 }
1452}
1453
1454impl Default for GrammarRegistry {
1455 fn default() -> Self {
1456 let defaults = SyntaxSet::load_defaults_newlines();
1458 let mut builder = defaults.into_builder();
1459 Self::add_embedded_grammars(&mut builder);
1460 let syntax_set = builder.build();
1461 let filename_scopes = Self::build_filename_scopes();
1462 let extra_extensions = Self::build_extra_extensions();
1463
1464 let mut registry = Self::new(syntax_set, extra_extensions, filename_scopes);
1465 registry.populate_built_in_aliases();
1466 registry.rebuild_catalog();
1467 registry
1468 }
1469}
1470
1471#[derive(Debug, Deserialize)]
1474pub struct PackageManifest {
1475 #[serde(default)]
1476 pub contributes: Option<Contributes>,
1477}
1478
1479#[derive(Debug, Deserialize, Default)]
1480pub struct Contributes {
1481 #[serde(default)]
1482 pub languages: Vec<LanguageContribution>,
1483 #[serde(default)]
1484 pub grammars: Vec<GrammarContribution>,
1485}
1486
1487#[derive(Debug, Deserialize)]
1488pub struct LanguageContribution {
1489 pub id: String,
1490 #[serde(default)]
1491 pub extensions: Vec<String>,
1492}
1493
1494#[derive(Debug, Deserialize)]
1495pub struct GrammarContribution {
1496 pub language: String,
1497 #[serde(rename = "scopeName")]
1498 pub scope_name: String,
1499 pub path: String,
1500}
1501
1502#[cfg(test)]
1503mod tests {
1504 use super::*;
1505
1506 #[test]
1507 fn test_empty_registry() {
1508 let registry = GrammarRegistry::empty();
1509 assert!(!registry.available_syntaxes().is_empty());
1511 }
1512
1513 #[test]
1514 fn test_default_registry() {
1515 let registry = GrammarRegistry::default();
1516 assert!(!registry.available_syntaxes().is_empty());
1518 }
1519
1520 #[test]
1521 fn test_find_syntax_for_common_extensions() {
1522 let registry = GrammarRegistry::default();
1523
1524 let test_cases = [
1526 ("test.py", true),
1527 ("test.rs", true),
1528 ("test.js", true),
1529 ("test.json", true),
1530 ("test.md", true),
1531 ("test.html", true),
1532 ("test.css", true),
1533 ("test.unknown_extension_xyz", false),
1534 ];
1535
1536 for (filename, should_exist) in test_cases {
1537 let path = Path::new(filename);
1538 let result = registry.find_syntax_for_file(path);
1539 assert_eq!(
1540 result.is_some(),
1541 should_exist,
1542 "Expected {:?} for {}",
1543 should_exist,
1544 filename
1545 );
1546 }
1547 }
1548
1549 #[test]
1550 fn test_syntax_set_arc() {
1551 let registry = GrammarRegistry::default();
1552 let arc1 = registry.syntax_set_arc();
1553 let arc2 = registry.syntax_set_arc();
1554 assert!(Arc::ptr_eq(&arc1, &arc2));
1556 }
1557
1558 #[test]
1559 fn test_shell_dotfiles_detection() {
1560 let registry = GrammarRegistry::default();
1561
1562 let shell_files = [".zshrc", ".zprofile", ".zshenv", ".bash_aliases"];
1564
1565 for filename in shell_files {
1566 let path = Path::new(filename);
1567 let result = registry.find_syntax_for_file(path);
1568 assert!(
1569 result.is_some(),
1570 "{} should be detected as a syntax",
1571 filename
1572 );
1573 let syntax = result.unwrap();
1574 assert!(
1576 syntax.name.to_lowercase().contains("bash")
1577 || syntax.name.to_lowercase().contains("shell"),
1578 "{} should be detected as shell/bash, got: {}",
1579 filename,
1580 syntax.name
1581 );
1582 }
1583 }
1584
1585 #[test]
1586 fn test_pkgbuild_detection() {
1587 let registry = GrammarRegistry::default();
1588
1589 for filename in ["PKGBUILD", "APKBUILD"] {
1591 let path = Path::new(filename);
1592 let result = registry.find_syntax_for_file(path);
1593 assert!(
1594 result.is_some(),
1595 "{} should be detected as a syntax",
1596 filename
1597 );
1598 let syntax = result.unwrap();
1599 assert!(
1601 syntax.name.to_lowercase().contains("bash")
1602 || syntax.name.to_lowercase().contains("shell"),
1603 "{} should be detected as shell/bash, got: {}",
1604 filename,
1605 syntax.name
1606 );
1607 }
1608 }
1609
1610 #[test]
1611 fn test_find_syntax_with_glob_filenames() {
1612 let mut registry = GrammarRegistry::default();
1613 let mut languages = std::collections::HashMap::new();
1614 languages.insert(
1615 "shell-configs".to_string(),
1616 crate::config::LanguageConfig {
1617 extensions: vec!["sh".to_string()],
1618 filenames: vec!["*.conf".to_string(), "*rc".to_string()],
1619 grammar: "bash".to_string(),
1620 comment_prefix: Some("#".to_string()),
1621 auto_indent: true,
1622 auto_close: None,
1623 auto_surround: None,
1624 textmate_grammar: None,
1625 show_whitespace_tabs: true,
1626 line_wrap: None,
1627 wrap_column: None,
1628 page_view: None,
1629 page_width: None,
1630 use_tabs: None,
1631 tab_size: None,
1632 formatter: None,
1633 format_on_save: false,
1634 on_save: vec![],
1635 word_characters: None,
1636 },
1637 );
1638 registry.apply_language_config(&languages);
1639
1640 assert!(
1641 registry
1642 .find_by_path(Path::new("nftables.conf"), None)
1643 .is_some(),
1644 "*.conf should match nftables.conf"
1645 );
1646 assert!(
1647 registry.find_by_path(Path::new("lfrc"), None).is_some(),
1648 "*rc should match lfrc"
1649 );
1650 let _ = registry.find_by_path(Path::new("randomfile"), None);
1652 }
1653
1654 #[test]
1655 fn test_find_syntax_with_path_glob_filenames() {
1656 let mut registry = GrammarRegistry::default();
1657 let mut languages = std::collections::HashMap::new();
1658 languages.insert(
1659 "shell-configs".to_string(),
1660 crate::config::LanguageConfig {
1661 extensions: vec!["sh".to_string()],
1662 filenames: vec!["/etc/**/rc.*".to_string()],
1663 grammar: "bash".to_string(),
1664 comment_prefix: Some("#".to_string()),
1665 auto_indent: true,
1666 auto_close: None,
1667 auto_surround: None,
1668 textmate_grammar: None,
1669 show_whitespace_tabs: true,
1670 line_wrap: None,
1671 wrap_column: None,
1672 page_view: None,
1673 page_width: None,
1674 use_tabs: None,
1675 tab_size: None,
1676 formatter: None,
1677 format_on_save: false,
1678 on_save: vec![],
1679 word_characters: None,
1680 },
1681 );
1682 registry.apply_language_config(&languages);
1683
1684 assert!(
1685 registry
1686 .find_by_path(Path::new("/etc/rc.conf"), None)
1687 .is_some(),
1688 "/etc/**/rc.* should match /etc/rc.conf"
1689 );
1690 assert!(
1691 registry
1692 .find_by_path(Path::new("/etc/init/rc.local"), None)
1693 .is_some(),
1694 "/etc/**/rc.* should match /etc/init/rc.local"
1695 );
1696 let _ = registry.find_by_path(Path::new("/var/rc.conf"), None);
1697 }
1698
1699 #[test]
1700 fn test_exact_filename_takes_priority_over_glob() {
1701 let mut registry = GrammarRegistry::default();
1702 let mut languages = std::collections::HashMap::new();
1703
1704 languages.insert(
1706 "custom-lfrc".to_string(),
1707 crate::config::LanguageConfig {
1708 extensions: vec![],
1709 filenames: vec!["lfrc".to_string()],
1710 grammar: "python".to_string(),
1711 comment_prefix: Some("#".to_string()),
1712 auto_indent: true,
1713 auto_close: None,
1714 auto_surround: None,
1715 textmate_grammar: None,
1716 show_whitespace_tabs: true,
1717 line_wrap: None,
1718 wrap_column: None,
1719 page_view: None,
1720 page_width: None,
1721 use_tabs: None,
1722 tab_size: None,
1723 formatter: None,
1724 format_on_save: false,
1725 on_save: vec![],
1726 word_characters: None,
1727 },
1728 );
1729
1730 languages.insert(
1732 "rc-files".to_string(),
1733 crate::config::LanguageConfig {
1734 extensions: vec![],
1735 filenames: vec!["*rc".to_string()],
1736 grammar: "bash".to_string(),
1737 comment_prefix: Some("#".to_string()),
1738 auto_indent: true,
1739 auto_close: None,
1740 auto_surround: None,
1741 textmate_grammar: None,
1742 show_whitespace_tabs: true,
1743 line_wrap: None,
1744 wrap_column: None,
1745 page_view: None,
1746 page_width: None,
1747 use_tabs: None,
1748 tab_size: None,
1749 formatter: None,
1750 format_on_save: false,
1751 on_save: vec![],
1752 word_characters: None,
1753 },
1754 );
1755
1756 registry.apply_language_config(&languages);
1757
1758 let entry = registry.find_by_path(Path::new("lfrc"), None).unwrap();
1760 assert!(
1761 entry.display_name.to_lowercase().contains("python"),
1762 "exact match should win over glob, got: {}",
1763 entry.display_name
1764 );
1765 }
1766
1767 #[test]
1768 fn test_built_in_aliases_resolve() {
1769 let registry = GrammarRegistry::default();
1770
1771 let syntax = registry.find_syntax_by_name("bash");
1773 assert!(syntax.is_some(), "alias 'bash' should resolve");
1774 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1775
1776 let syntax = registry.find_syntax_by_name("cpp");
1778 assert!(syntax.is_some(), "alias 'cpp' should resolve");
1779 assert_eq!(syntax.unwrap().name, "C++");
1780
1781 let syntax = registry.find_syntax_by_name("csharp");
1783 assert!(syntax.is_some(), "alias 'csharp' should resolve");
1784 assert_eq!(syntax.unwrap().name, "C#");
1785
1786 let syntax = registry.find_syntax_by_name("sh");
1788 assert!(syntax.is_some(), "alias 'sh' should resolve");
1789 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1790
1791 let syntax = registry.find_syntax_by_name("proto");
1793 assert!(syntax.is_some(), "alias 'proto' should resolve");
1794 assert_eq!(syntax.unwrap().name, "Protocol Buffers");
1795 }
1796
1797 #[test]
1798 fn test_alias_case_insensitive_input() {
1799 let registry = GrammarRegistry::default();
1800
1801 let syntax = registry.find_syntax_by_name("BASH");
1803 assert!(
1804 syntax.is_some(),
1805 "alias 'BASH' should resolve case-insensitively"
1806 );
1807 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1808
1809 let syntax = registry.find_syntax_by_name("Cpp");
1810 assert!(
1811 syntax.is_some(),
1812 "alias 'Cpp' should resolve case-insensitively"
1813 );
1814 assert_eq!(syntax.unwrap().name, "C++");
1815 }
1816
1817 #[test]
1818 fn test_full_name_still_works() {
1819 let registry = GrammarRegistry::default();
1820
1821 let syntax = registry.find_syntax_by_name("Bourne Again Shell (bash)");
1823 assert!(syntax.is_some(), "full name should still resolve");
1824 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1825
1826 let syntax = registry.find_syntax_by_name("bourne again shell (bash)");
1828 assert!(
1829 syntax.is_some(),
1830 "case-insensitive full name should resolve"
1831 );
1832 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1833 }
1834
1835 #[test]
1836 fn test_alias_does_not_shadow_full_names() {
1837 let registry = GrammarRegistry::default();
1838
1839 let syntax = registry.find_syntax_by_name("rust");
1841 assert!(syntax.is_some());
1842 assert_eq!(syntax.unwrap().name, "Rust");
1843
1844 let syntax = registry.find_syntax_by_name("go");
1846 assert!(syntax.is_some());
1847 assert_eq!(syntax.unwrap().name, "Go");
1848 }
1849
1850 #[test]
1851 fn test_register_alias_rejects_collision() {
1852 let mut registry = GrammarRegistry::default();
1853
1854 assert!(registry.register_alias("myalias", "Rust"));
1856 assert!(!registry.register_alias("myalias", "Go"));
1857
1858 assert!(registry.register_alias("myalias", "Rust"));
1860 }
1861
1862 #[test]
1863 fn test_register_alias_rejects_nonexistent_target() {
1864 let mut registry = GrammarRegistry::default();
1865 assert!(!registry.register_alias("nope", "Nonexistent Grammar"));
1866 }
1867
1868 #[test]
1869 fn test_register_alias_skips_existing_grammar_name() {
1870 let mut registry = GrammarRegistry::default();
1871
1872 assert!(!registry.register_alias("rust", "Rust"));
1874 assert!(registry.find_syntax_by_name("rust").is_some());
1876 }
1877
1878 #[test]
1879 fn test_available_grammar_info_includes_short_names() {
1880 let registry = GrammarRegistry::default();
1881 let infos = registry.available_grammar_info();
1882
1883 let bash_info = infos.iter().find(|g| g.name == "Bourne Again Shell (bash)");
1884 assert!(bash_info.is_some(), "bash grammar should be in the list");
1885 let bash_info = bash_info.unwrap();
1886 assert!(
1887 bash_info.short_name.is_some(),
1888 "bash grammar should have a short_name"
1889 );
1890 assert_eq!(bash_info.short_name.as_deref(), Some("sh"));
1892 }
1893
1894 #[test]
1895 fn test_catalog_contains_each_language_once() {
1896 let registry = GrammarRegistry::default();
1897 let catalog = registry.catalog();
1898
1899 let mut seen = std::collections::HashSet::new();
1901 for entry in catalog {
1902 let key = entry.display_name.to_lowercase();
1903 assert!(
1904 seen.insert(key.clone()),
1905 "duplicate catalog entry for display_name={:?}",
1906 entry.display_name
1907 );
1908 }
1909
1910 let ts = registry
1913 .find_by_name("TypeScript")
1914 .expect("TypeScript must be in the catalog");
1915 assert!(ts.engines.syntect.is_none());
1916 assert_eq!(
1917 ts.engines.tree_sitter,
1918 Some(fresh_languages::Language::TypeScript)
1919 );
1920 assert_eq!(ts.language_id, "typescript");
1921 assert!(ts.extensions.iter().any(|e| e == "ts"));
1922
1923 for name in ["Rust", "Python", "JavaScript"] {
1926 let entry = registry
1927 .find_by_name(name)
1928 .unwrap_or_else(|| panic!("{} must be in the catalog", name));
1929 assert!(
1930 entry.engines.syntect.is_some(),
1931 "{} should have a syntect index",
1932 name
1933 );
1934 assert!(
1935 entry.engines.tree_sitter.is_some(),
1936 "{} should also have a tree-sitter language",
1937 name
1938 );
1939 let by_id = registry
1942 .find_by_name(&entry.language_id)
1943 .expect("language_id should resolve");
1944 assert_eq!(by_id.display_name, entry.display_name);
1945 }
1946 }
1947
1948 #[test]
1949 fn test_catalog_find_by_path_and_extension() {
1950 let registry = GrammarRegistry::default();
1951 let ts = registry
1952 .find_by_path(Path::new("foo.ts"), None)
1953 .expect("foo.ts should resolve");
1954 assert_eq!(ts.display_name, "TypeScript");
1955 let rs = registry.find_by_extension("rs").expect("rs should resolve");
1956 assert_eq!(rs.display_name, "Rust");
1957 }
1958
1959 fn lang_cfg(
1961 grammar: &str,
1962 extensions: &[&str],
1963 filenames: &[&str],
1964 ) -> crate::config::LanguageConfig {
1965 crate::config::LanguageConfig {
1966 extensions: extensions.iter().map(|s| s.to_string()).collect(),
1967 filenames: filenames.iter().map(|s| s.to_string()).collect(),
1968 grammar: grammar.to_string(),
1969 comment_prefix: None,
1970 auto_indent: true,
1971 auto_close: None,
1972 auto_surround: None,
1973 textmate_grammar: None,
1974 show_whitespace_tabs: true,
1975 line_wrap: None,
1976 wrap_column: None,
1977 page_view: None,
1978 page_width: None,
1979 use_tabs: None,
1980 tab_size: None,
1981 formatter: None,
1982 format_on_save: false,
1983 on_save: vec![],
1984 word_characters: None,
1985 }
1986 }
1987
1988 #[test]
1992 fn test_user_alias_resolves_via_find_by_name() {
1993 let mut registry = GrammarRegistry::default();
1994 let mut languages = std::collections::HashMap::new();
1995 languages.insert("mylang".to_string(), lang_cfg("Rust", &[], &[]));
1996 registry.apply_language_config(&languages);
1997
1998 let entry = registry
1999 .find_by_name("mylang")
2000 .expect("user-declared alias 'mylang' must resolve");
2001 assert_eq!(entry.display_name, "Rust");
2002 }
2003
2004 #[test]
2008 fn test_register_alias_preserves_applied_language_config() {
2009 let mut registry = GrammarRegistry::default();
2010 let mut languages = std::collections::HashMap::new();
2011 languages.insert(
2012 "shell-configs".to_string(),
2013 lang_cfg("bash", &["myconf"], &["*.myconf"]),
2014 );
2015 registry.apply_language_config(&languages);
2016
2017 assert!(registry.find_by_extension("myconf").is_some());
2019 assert!(
2020 registry
2021 .find_by_path(Path::new("foo.myconf"), None)
2022 .is_some(),
2023 "glob should match before register_alias"
2024 );
2025
2026 registry.register_alias("mycustom", "Rust");
2028
2029 assert!(
2030 registry.find_by_extension("myconf").is_some(),
2031 "config extension must survive register_alias"
2032 );
2033 assert!(
2034 registry
2035 .find_by_path(Path::new("foo.myconf"), None)
2036 .is_some(),
2037 "glob must survive register_alias"
2038 );
2039 }
2040
2041 #[test]
2045 fn test_from_syntax_name_preserves_canonical_display_name() {
2046 use crate::primitives::detected_language::DetectedLanguage;
2047 let registry = GrammarRegistry::default();
2048 let languages = std::collections::HashMap::new();
2049
2050 let detected = DetectedLanguage::from_syntax_name("BASH", ®istry, &languages)
2051 .expect("BASH should resolve via alias");
2052 assert_eq!(
2053 detected.display_name, "Bourne Again Shell (bash)",
2054 "display_name must be canonical, not user-typed"
2055 );
2056 }
2057
2058 #[test]
2062 fn test_config_only_language_appears_in_catalog() {
2063 let mut registry = GrammarRegistry::default();
2064 let mut languages = std::collections::HashMap::new();
2065 languages.insert("fish".to_string(), lang_cfg("fish", &["fish"], &[]));
2067 registry.apply_language_config(&languages);
2068
2069 let entry = registry
2070 .find_by_name("fish")
2071 .expect("fish should be in the catalog after apply_language_config");
2072 assert!(entry.engines.syntect.is_none());
2073 assert!(entry.engines.tree_sitter.is_none());
2074 assert_eq!(entry.language_id, "fish");
2075 assert!(entry.extensions.iter().any(|e| e == "fish"));
2076 }
2077
2078 #[test]
2083 fn test_config_extension_overrides_builtin() {
2084 let mut registry = GrammarRegistry::default();
2085 assert_eq!(
2087 registry.find_by_extension("js").unwrap().display_name,
2088 "JavaScript"
2089 );
2090
2091 let mut languages = std::collections::HashMap::new();
2092 languages.insert(
2093 "ts-overlay".to_string(),
2094 lang_cfg("TypeScript", &["js"], &[]),
2095 );
2096 registry.apply_language_config(&languages);
2097
2098 assert_eq!(
2099 registry.find_by_extension("js").unwrap().display_name,
2100 "TypeScript",
2101 "user-config extension must win over built-in"
2102 );
2103 }
2104
2105 #[test]
2112 fn test_bare_filename_resolves_via_find_by_path() {
2113 let registry = GrammarRegistry::default();
2114 for (filename, expected_substr) in [
2115 ("Gemfile", "ruby"),
2116 ("Rakefile", "ruby"),
2117 ("Vagrantfile", "ruby"),
2118 ("Makefile", "makefile"),
2119 ("GNUmakefile", "makefile"),
2120 ] {
2121 let entry = registry
2122 .find_by_path(Path::new(filename), None)
2123 .unwrap_or_else(|| panic!("{} must resolve via catalog", filename));
2124 assert!(
2125 entry.display_name.to_lowercase().contains(expected_substr),
2126 "{} should resolve to {} grammar, got {}",
2127 filename,
2128 expected_substr,
2129 entry.display_name
2130 );
2131 }
2132 }
2133
2134 #[test]
2139 fn test_jsx_resolves_to_javascript() {
2140 let registry = GrammarRegistry::default();
2141 let entry = registry
2142 .find_by_path(Path::new("foo.jsx"), None)
2143 .expect("foo.jsx must resolve");
2144 assert_eq!(entry.display_name, "JavaScript");
2145 }
2146
2147 #[test]
2152 fn test_rebuild_catalog_replays_language_config() {
2153 let mut registry = GrammarRegistry::default();
2154 let mut languages = std::collections::HashMap::new();
2155 languages.insert(
2156 "myshell".to_string(),
2157 lang_cfg("bash", &["myext"], &["*.myglob"]),
2158 );
2159 registry.apply_language_config(&languages);
2160 assert!(registry.find_by_extension("myext").is_some());
2161 assert!(registry
2162 .find_by_path(Path::new("foo.myglob"), None)
2163 .is_some());
2164
2165 registry.rebuild_catalog();
2168 assert!(
2169 registry.find_by_extension("myext").is_some(),
2170 "rebuild_catalog must replay applied user config"
2171 );
2172 assert!(
2173 registry
2174 .find_by_path(Path::new("foo.myglob"), None)
2175 .is_some(),
2176 "rebuild_catalog must replay user globs"
2177 );
2178 }
2179
2180 #[test]
2183 fn test_apply_language_config_idempotent() {
2184 let mut registry = GrammarRegistry::default();
2185 let mut languages = std::collections::HashMap::new();
2186 languages.insert(
2187 "shell-cfg".to_string(),
2188 lang_cfg("bash", &["myconf"], &["*.myconf"]),
2189 );
2190
2191 registry.apply_language_config(&languages);
2192 let first_extensions = registry
2193 .find_by_name("bash")
2194 .unwrap()
2195 .extensions
2196 .iter()
2197 .filter(|e| e == &"myconf")
2198 .count();
2199 let first_globs = registry
2200 .find_by_name("bash")
2201 .unwrap()
2202 .filename_globs
2203 .iter()
2204 .filter(|g| g == &"*.myconf")
2205 .count();
2206 assert_eq!(first_extensions, 1);
2207 assert_eq!(first_globs, 1);
2208
2209 registry.apply_language_config(&languages);
2211 let second_extensions = registry
2212 .find_by_name("bash")
2213 .unwrap()
2214 .extensions
2215 .iter()
2216 .filter(|e| e == &"myconf")
2217 .count();
2218 let second_globs = registry
2219 .find_by_name("bash")
2220 .unwrap()
2221 .filename_globs
2222 .iter()
2223 .filter(|g| g == &"*.myconf")
2224 .count();
2225 assert_eq!(second_extensions, 1, "extensions must not duplicate");
2226 assert_eq!(second_globs, 1, "globs must not duplicate");
2227 }
2228
2229 #[test]
2233 fn test_tree_sitter_bridge() {
2234 assert_eq!(
2235 tree_sitter_for_syntect_name("Bourne Again Shell (bash)"),
2236 Some(fresh_languages::Language::Bash)
2237 );
2238 assert_eq!(
2239 tree_sitter_for_syntect_name("Rust"),
2240 Some(fresh_languages::Language::Rust)
2241 );
2242 assert_eq!(tree_sitter_for_syntect_name("Nushell"), None);
2244 assert_eq!(tree_sitter_for_syntect_name("does-not-exist"), None);
2246 }
2247}