1use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10use syntect::parsing::{SyntaxDefinition, SyntaxReference, SyntaxSet, SyntaxSetBuilder};
11
12pub use crate::primitives::glob_match::{
14 filename_glob_matches, is_glob_pattern, is_path_pattern, path_glob_matches,
15};
16
17#[derive(Clone, Debug)]
22pub struct GrammarSpec {
23 pub language: String,
25 pub path: PathBuf,
27 pub extensions: Vec<String>,
29}
30
31#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
33#[serde(tag = "type")]
34pub enum GrammarSource {
35 #[serde(rename = "built-in")]
37 BuiltIn,
38 #[serde(rename = "user")]
40 User { path: PathBuf },
41 #[serde(rename = "language-pack")]
43 LanguagePack { name: String, path: PathBuf },
44 #[serde(rename = "bundle")]
46 Bundle { name: String, path: PathBuf },
47 #[serde(rename = "plugin")]
49 Plugin { plugin: String, path: PathBuf },
50}
51
52impl std::fmt::Display for GrammarSource {
53 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54 match self {
55 GrammarSource::BuiltIn => write!(f, "built-in"),
56 GrammarSource::User { path } => write!(f, "user ({})", path.display()),
57 GrammarSource::LanguagePack { name, .. } => write!(f, "language-pack ({})", name),
58 GrammarSource::Bundle { name, .. } => write!(f, "bundle ({})", name),
59 GrammarSource::Plugin { plugin, .. } => write!(f, "plugin ({})", plugin),
60 }
61 }
62}
63
64#[derive(Clone, Debug, Serialize, Deserialize)]
66pub struct GrammarInfo {
67 pub name: String,
69 pub source: GrammarSource,
71 pub file_extensions: Vec<String>,
73 #[serde(default, skip_serializing_if = "Option::is_none")]
75 pub short_name: Option<String>,
76}
77
78const SYNTECT_TO_TREE_SITTER_ALIASES: &[(&str, fresh_languages::Language)] =
86 &[("Bourne Again Shell (bash)", fresh_languages::Language::Bash)];
87
88fn tree_sitter_for_syntect_name(display_name: &str) -> Option<fresh_languages::Language> {
91 for (syntect_name, lang) in SYNTECT_TO_TREE_SITTER_ALIASES {
92 if *syntect_name == display_name {
93 return Some(*lang);
94 }
95 }
96 fresh_languages::Language::all()
97 .iter()
98 .find(|l| l.display_name() == display_name)
99 .copied()
100}
101
102#[derive(Clone, Debug, Default)]
107pub struct GrammarEngines {
108 pub syntect: Option<usize>,
111 pub tree_sitter: Option<fresh_languages::Language>,
113}
114
115#[derive(Clone, Debug)]
124pub struct GrammarEntry {
125 pub display_name: String,
127 pub language_id: String,
129 pub short_name: Option<String>,
131 pub extensions: Vec<String>,
133 pub filenames: Vec<String>,
135 pub filename_globs: Vec<String>,
137 pub source: GrammarSource,
139 pub engines: GrammarEngines,
141}
142
143pub const TOML_GRAMMAR: &str = include_str!("../../grammars/toml.sublime-syntax");
145
146pub const ODIN_GRAMMAR: &str = include_str!("../../grammars/odin/Odin.sublime-syntax");
149
150pub const ZIG_GRAMMAR: &str = include_str!("../../grammars/zig.sublime-syntax");
152
153pub const GIT_REBASE_GRAMMAR: &str = include_str!("../../grammars/git-rebase.sublime-syntax");
155
156pub const GIT_COMMIT_GRAMMAR: &str = include_str!("../../grammars/git-commit.sublime-syntax");
158
159pub const GITIGNORE_GRAMMAR: &str = include_str!("../../grammars/gitignore.sublime-syntax");
161
162pub const GITCONFIG_GRAMMAR: &str = include_str!("../../grammars/gitconfig.sublime-syntax");
164
165pub const GITATTRIBUTES_GRAMMAR: &str = include_str!("../../grammars/gitattributes.sublime-syntax");
167
168pub const TYPST_GRAMMAR: &str = include_str!("../../grammars/typst.sublime-syntax");
170
171pub const DOCKERFILE_GRAMMAR: &str = include_str!("../../grammars/dockerfile.sublime-syntax");
173pub const INI_GRAMMAR: &str = include_str!("../../grammars/ini.sublime-syntax");
175pub const CMAKE_GRAMMAR: &str = include_str!("../../grammars/cmake.sublime-syntax");
177pub const SCSS_GRAMMAR: &str = include_str!("../../grammars/scss.sublime-syntax");
179pub const LESS_GRAMMAR: &str = include_str!("../../grammars/less.sublime-syntax");
181pub const POWERSHELL_GRAMMAR: &str = include_str!("../../grammars/powershell.sublime-syntax");
183pub const KOTLIN_GRAMMAR: &str = include_str!("../../grammars/kotlin.sublime-syntax");
185pub const SWIFT_GRAMMAR: &str = include_str!("../../grammars/swift.sublime-syntax");
187pub const DART_GRAMMAR: &str = include_str!("../../grammars/dart.sublime-syntax");
189pub const ELIXIR_GRAMMAR: &str = include_str!("../../grammars/elixir.sublime-syntax");
191pub const FSHARP_GRAMMAR: &str = include_str!("../../grammars/fsharp.sublime-syntax");
193pub const NIX_GRAMMAR: &str = include_str!("../../grammars/nix.sublime-syntax");
195pub const HCL_GRAMMAR: &str = include_str!("../../grammars/hcl.sublime-syntax");
197pub const PROTOBUF_GRAMMAR: &str = include_str!("../../grammars/protobuf.sublime-syntax");
199pub const GRAPHQL_GRAMMAR: &str = include_str!("../../grammars/graphql.sublime-syntax");
201pub const JULIA_GRAMMAR: &str = include_str!("../../grammars/julia.sublime-syntax");
203pub const NIM_GRAMMAR: &str = include_str!("../../grammars/nim.sublime-syntax");
205pub const GLEAM_GRAMMAR: &str = include_str!("../../grammars/gleam.sublime-syntax");
207pub const VLANG_GRAMMAR: &str = include_str!("../../grammars/vlang.sublime-syntax");
209pub const SOLIDITY_GRAMMAR: &str = include_str!("../../grammars/solidity.sublime-syntax");
211pub const KDL_GRAMMAR: &str = include_str!("../../grammars/kdl.sublime-syntax");
213pub const NUSHELL_GRAMMAR: &str = include_str!("../../grammars/nushell.sublime-syntax");
215pub const STARLARK_GRAMMAR: &str = include_str!("../../grammars/starlark.sublime-syntax");
217pub const JUSTFILE_GRAMMAR: &str = include_str!("../../grammars/justfile.sublime-syntax");
219pub const EARTHFILE_GRAMMAR: &str = include_str!("../../grammars/earthfile.sublime-syntax");
221pub const GOMOD_GRAMMAR: &str = include_str!("../../grammars/gomod.sublime-syntax");
223pub const VUE_GRAMMAR: &str = include_str!("../../grammars/vue.sublime-syntax");
225pub const SVELTE_GRAMMAR: &str = include_str!("../../grammars/svelte.sublime-syntax");
227pub const ASTRO_GRAMMAR: &str = include_str!("../../grammars/astro.sublime-syntax");
229pub const HYPRLANG_GRAMMAR: &str = include_str!("../../grammars/hyprlang.sublime-syntax");
231pub const AUTOHOTKEY_GRAMMAR: &str =
234 include_str!("../../grammars/autohotkey/AutoHotkey.sublime-syntax");
235
236impl std::fmt::Debug for GrammarRegistry {
241 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
242 f.debug_struct("GrammarRegistry")
243 .field("syntax_count", &self.syntax_set.syntaxes().len())
244 .finish()
245 }
246}
247
248pub struct GrammarRegistry {
249 syntax_set: Arc<SyntaxSet>,
251 user_extensions: HashMap<String, String>,
253 filename_scopes: HashMap<String, String>,
255 loaded_grammar_paths: Vec<GrammarSpec>,
257 grammar_sources: HashMap<String, GrammarInfo>,
259 aliases: HashMap<String, String>,
263 catalog: Vec<GrammarEntry>,
267 catalog_by_name: HashMap<String, usize>,
270 catalog_by_extension: HashMap<String, usize>,
272 catalog_by_filename: HashMap<String, usize>,
274 applied_language_config: HashMap<String, crate::config::LanguageConfig>,
279}
280
281impl GrammarRegistry {
282 pub(crate) fn new(
287 syntax_set: SyntaxSet,
288 user_extensions: HashMap<String, String>,
289 filename_scopes: HashMap<String, String>,
290 ) -> Self {
291 Self::new_with_loaded_paths(
292 syntax_set,
293 user_extensions,
294 filename_scopes,
295 Vec::new(),
296 HashMap::new(),
297 )
298 }
299
300 pub(crate) fn new_with_loaded_paths(
305 syntax_set: SyntaxSet,
306 user_extensions: HashMap<String, String>,
307 filename_scopes: HashMap<String, String>,
308 loaded_grammar_paths: Vec<GrammarSpec>,
309 grammar_sources: HashMap<String, GrammarInfo>,
310 ) -> Self {
311 let mut reg = Self {
312 syntax_set: Arc::new(syntax_set),
313 user_extensions,
314 filename_scopes,
315 loaded_grammar_paths,
316 grammar_sources,
317 aliases: HashMap::new(),
318 catalog: Vec::new(),
319 catalog_by_name: HashMap::new(),
320 catalog_by_extension: HashMap::new(),
321 catalog_by_filename: HashMap::new(),
322 applied_language_config: HashMap::new(),
323 };
324 reg.rebuild_catalog();
325 reg
326 }
327
328 pub fn empty() -> Arc<Self> {
330 let mut builder = SyntaxSetBuilder::new();
331 builder.add_plain_text_syntax();
332 let mut reg = Self {
333 syntax_set: Arc::new(builder.build()),
334 user_extensions: HashMap::new(),
335 filename_scopes: HashMap::new(),
336 loaded_grammar_paths: Vec::new(),
337 grammar_sources: HashMap::new(),
338 aliases: HashMap::new(),
339 catalog: Vec::new(),
340 catalog_by_name: HashMap::new(),
341 catalog_by_extension: HashMap::new(),
342 catalog_by_filename: HashMap::new(),
343 applied_language_config: HashMap::new(),
344 };
345 reg.rebuild_catalog();
346 Arc::new(reg)
347 }
348
349 pub fn defaults_only() -> Arc<Self> {
356 tracing::info!("defaults_only: loading pre-compiled syntax packdump...");
360 let syntax_set: SyntaxSet = syntect::dumps::from_uncompressed_data(include_bytes!(
361 concat!(env!("OUT_DIR"), "/default_syntaxes.packdump")
362 ))
363 .expect("Failed to load pre-compiled syntax packdump");
364 tracing::info!(
365 "defaults_only: loaded ({} syntaxes)",
366 syntax_set.syntaxes().len()
367 );
368 let grammar_sources = Self::build_grammar_sources_from_syntax_set(&syntax_set);
369 let filename_scopes = Self::build_filename_scopes();
370 let extra_extensions = Self::build_extra_extensions();
371 let mut registry = Self {
372 syntax_set: Arc::new(syntax_set),
373 user_extensions: extra_extensions,
374 filename_scopes,
375 loaded_grammar_paths: Vec::new(),
376 grammar_sources,
377 aliases: HashMap::new(),
378 catalog: Vec::new(),
379 catalog_by_name: HashMap::new(),
380 catalog_by_extension: HashMap::new(),
381 catalog_by_filename: HashMap::new(),
382 applied_language_config: HashMap::new(),
383 };
384 registry.populate_built_in_aliases();
385 registry.rebuild_catalog();
386 Arc::new(registry)
387 }
388
389 pub(crate) fn build_extra_extensions() -> HashMap<String, String> {
394 let mut map = HashMap::new();
395
396 let js_scope = "source.js".to_string();
398 map.insert("cjs".to_string(), js_scope.clone());
399 map.insert("mjs".to_string(), js_scope);
400
401 map
405 }
406
407 pub(crate) fn build_filename_scopes() -> HashMap<String, String> {
409 let mut map = HashMap::new();
410
411 let shell_scope = "source.shell.bash".to_string();
413 for filename in [
414 ".zshrc",
415 ".zprofile",
416 ".zshenv",
417 ".zlogin",
418 ".zlogout",
419 ".bash_aliases",
420 "PKGBUILD",
423 "APKBUILD",
424 ] {
425 map.insert(filename.to_string(), shell_scope.clone());
426 }
427
428 let git_rebase_scope = "source.git-rebase-todo".to_string();
430 map.insert("git-rebase-todo".to_string(), git_rebase_scope);
431
432 let git_commit_scope = "source.git-commit".to_string();
434 for filename in ["COMMIT_EDITMSG", "MERGE_MSG", "SQUASH_MSG", "TAG_EDITMSG"] {
435 map.insert(filename.to_string(), git_commit_scope.clone());
436 }
437
438 let gitignore_scope = "source.gitignore".to_string();
440 for filename in [".gitignore", ".dockerignore", ".npmignore", ".hgignore"] {
441 map.insert(filename.to_string(), gitignore_scope.clone());
442 }
443
444 let gitconfig_scope = "source.gitconfig".to_string();
446 for filename in [".gitconfig", ".gitmodules"] {
447 map.insert(filename.to_string(), gitconfig_scope.clone());
448 }
449
450 let gitattributes_scope = "source.gitattributes".to_string();
452 map.insert(".gitattributes".to_string(), gitattributes_scope);
453
454 let groovy_scope = "source.groovy".to_string();
456 map.insert("Jenkinsfile".to_string(), groovy_scope);
457
458 let ruby_scope = "source.ruby".to_string();
461 map.insert("Brewfile".to_string(), ruby_scope);
462
463 let dockerfile_scope = "source.dockerfile".to_string();
465 map.insert("Dockerfile".to_string(), dockerfile_scope.clone());
466 map.insert("Containerfile".to_string(), dockerfile_scope.clone());
467 map.insert("Dockerfile.dev".to_string(), dockerfile_scope.clone());
469 map.insert("Dockerfile.prod".to_string(), dockerfile_scope.clone());
470 map.insert("Dockerfile.test".to_string(), dockerfile_scope.clone());
471 map.insert("Dockerfile.build".to_string(), dockerfile_scope.clone());
472
473 let cmake_scope = "source.cmake".to_string();
475 map.insert("CMakeLists.txt".to_string(), cmake_scope);
476
477 let starlark_scope = "source.starlark".to_string();
479 map.insert("BUILD".to_string(), starlark_scope.clone());
480 map.insert("BUILD.bazel".to_string(), starlark_scope.clone());
481 map.insert("WORKSPACE".to_string(), starlark_scope.clone());
482 map.insert("WORKSPACE.bazel".to_string(), starlark_scope.clone());
483 map.insert("Tiltfile".to_string(), starlark_scope);
484
485 let justfile_scope = "source.justfile".to_string();
487 map.insert("justfile".to_string(), justfile_scope.clone());
488 map.insert("Justfile".to_string(), justfile_scope.clone());
489 map.insert(".justfile".to_string(), justfile_scope);
490
491 let ini_scope = "source.ini".to_string();
493 map.insert(".editorconfig".to_string(), ini_scope);
494
495 let earthfile_scope = "source.earthfile".to_string();
497 map.insert("Earthfile".to_string(), earthfile_scope);
498
499 let hyprlang_scope = "source.hyprlang".to_string();
501 map.insert("hyprland.conf".to_string(), hyprlang_scope.clone());
502 map.insert("hyprpaper.conf".to_string(), hyprlang_scope.clone());
503 map.insert("hyprlock.conf".to_string(), hyprlang_scope);
504
505 let gomod_scope = "source.gomod".to_string();
507 map.insert("go.mod".to_string(), gomod_scope.clone());
508 map.insert("go.sum".to_string(), gomod_scope);
509
510 map
511 }
512
513 pub(crate) fn add_embedded_grammars(builder: &mut SyntaxSetBuilder) {
515 match SyntaxDefinition::load_from_str(TOML_GRAMMAR, true, Some("TOML")) {
517 Ok(syntax) => {
518 builder.add(syntax);
519 tracing::debug!("Loaded embedded TOML grammar");
520 }
521 Err(e) => {
522 tracing::warn!("Failed to load embedded TOML grammar: {}", e);
523 }
524 }
525
526 match SyntaxDefinition::load_from_str(ODIN_GRAMMAR, true, Some("Odin")) {
528 Ok(syntax) => {
529 builder.add(syntax);
530 tracing::debug!("Loaded embedded Odin grammar");
531 }
532 Err(e) => {
533 tracing::warn!("Failed to load embedded Odin grammar: {}", e);
534 }
535 }
536
537 match SyntaxDefinition::load_from_str(ZIG_GRAMMAR, true, Some("Zig")) {
539 Ok(syntax) => {
540 builder.add(syntax);
541 tracing::debug!("Loaded embedded Zig grammar");
542 }
543 Err(e) => {
544 tracing::warn!("Failed to load embedded Zig grammar: {}", e);
545 }
546 }
547
548 match SyntaxDefinition::load_from_str(GIT_REBASE_GRAMMAR, true, Some("Git Rebase Todo")) {
550 Ok(syntax) => {
551 builder.add(syntax);
552 tracing::debug!("Loaded embedded Git Rebase Todo grammar");
553 }
554 Err(e) => {
555 tracing::warn!("Failed to load embedded Git Rebase Todo grammar: {}", e);
556 }
557 }
558
559 match SyntaxDefinition::load_from_str(GIT_COMMIT_GRAMMAR, true, Some("Git Commit Message"))
561 {
562 Ok(syntax) => {
563 builder.add(syntax);
564 tracing::debug!("Loaded embedded Git Commit Message grammar");
565 }
566 Err(e) => {
567 tracing::warn!("Failed to load embedded Git Commit Message grammar: {}", e);
568 }
569 }
570
571 match SyntaxDefinition::load_from_str(GITIGNORE_GRAMMAR, true, Some("Gitignore")) {
573 Ok(syntax) => {
574 builder.add(syntax);
575 tracing::debug!("Loaded embedded Gitignore grammar");
576 }
577 Err(e) => {
578 tracing::warn!("Failed to load embedded Gitignore grammar: {}", e);
579 }
580 }
581
582 match SyntaxDefinition::load_from_str(GITCONFIG_GRAMMAR, true, Some("Git Config")) {
584 Ok(syntax) => {
585 builder.add(syntax);
586 tracing::debug!("Loaded embedded Git Config grammar");
587 }
588 Err(e) => {
589 tracing::warn!("Failed to load embedded Git Config grammar: {}", e);
590 }
591 }
592
593 match SyntaxDefinition::load_from_str(GITATTRIBUTES_GRAMMAR, true, Some("Git Attributes")) {
595 Ok(syntax) => {
596 builder.add(syntax);
597 tracing::debug!("Loaded embedded Git Attributes grammar");
598 }
599 Err(e) => {
600 tracing::warn!("Failed to load embedded Git Attributes grammar: {}", e);
601 }
602 }
603
604 match SyntaxDefinition::load_from_str(TYPST_GRAMMAR, true, Some("Typst")) {
606 Ok(syntax) => {
607 builder.add(syntax);
608 tracing::debug!("Loaded embedded Typst grammar");
609 }
610 Err(e) => {
611 tracing::warn!("Failed to load embedded Typst grammar: {}", e);
612 }
613 }
614
615 let additional_grammars: &[(&str, &str)] = &[
617 (DOCKERFILE_GRAMMAR, "Dockerfile"),
618 (INI_GRAMMAR, "INI"),
619 (CMAKE_GRAMMAR, "CMake"),
620 (SCSS_GRAMMAR, "SCSS"),
621 (LESS_GRAMMAR, "LESS"),
622 (POWERSHELL_GRAMMAR, "PowerShell"),
623 (KOTLIN_GRAMMAR, "Kotlin"),
624 (SWIFT_GRAMMAR, "Swift"),
625 (DART_GRAMMAR, "Dart"),
626 (ELIXIR_GRAMMAR, "Elixir"),
627 (FSHARP_GRAMMAR, "FSharp"),
628 (NIX_GRAMMAR, "Nix"),
629 (HCL_GRAMMAR, "HCL"),
630 (PROTOBUF_GRAMMAR, "Protocol Buffers"),
631 (GRAPHQL_GRAMMAR, "GraphQL"),
632 (JULIA_GRAMMAR, "Julia"),
633 (NIM_GRAMMAR, "Nim"),
634 (GLEAM_GRAMMAR, "Gleam"),
635 (VLANG_GRAMMAR, "V"),
636 (SOLIDITY_GRAMMAR, "Solidity"),
637 (KDL_GRAMMAR, "KDL"),
638 (NUSHELL_GRAMMAR, "Nushell"),
639 (STARLARK_GRAMMAR, "Starlark"),
640 (JUSTFILE_GRAMMAR, "Justfile"),
641 (EARTHFILE_GRAMMAR, "Earthfile"),
642 (GOMOD_GRAMMAR, "Go Module"),
643 (VUE_GRAMMAR, "Vue"),
644 (SVELTE_GRAMMAR, "Svelte"),
645 (ASTRO_GRAMMAR, "Astro"),
646 (HYPRLANG_GRAMMAR, "Hyprlang"),
647 (AUTOHOTKEY_GRAMMAR, "AutoHotkey"),
648 ];
649
650 for (grammar_str, name) in additional_grammars {
651 match SyntaxDefinition::load_from_str(grammar_str, true, Some(name)) {
652 Ok(syntax) => {
653 builder.add(syntax);
654 tracing::debug!("Loaded embedded {} grammar", name);
655 }
656 Err(e) => {
657 tracing::warn!("Failed to load embedded {} grammar: {}", name, e);
658 }
659 }
660 }
661 }
662
663 pub fn find_syntax_for_file(&self, path: &Path) -> Option<&SyntaxReference> {
671 if let Some(entry) = self.find_by_path(path) {
672 return entry
678 .engines
679 .syntect
680 .map(|i| &self.syntax_set.syntaxes()[i]);
681 }
682 self.syntax_set.find_syntax_for_file(path).ok().flatten()
685 }
686
687 pub fn find_syntax_by_name(&self, name: &str) -> Option<&SyntaxReference> {
695 if let Some(entry) = self.find_by_name(name) {
696 if let Some(idx) = entry.engines.syntect {
697 return Some(&self.syntax_set.syntaxes()[idx]);
698 }
699 }
700 self.syntax_set.find_syntax_by_name(name)
704 }
705
706 fn built_in_aliases() -> Vec<(&'static str, &'static str)> {
715 vec![
716 ("bash", "Bourne Again Shell (bash)"),
718 ("shell", "Bourne Again Shell (bash)"),
719 ("sh", "Bourne Again Shell (bash)"),
720 ("c++", "C++"),
721 ("cpp", "C++"),
722 ("csharp", "C#"),
723 ("objc", "Objective-C"),
724 ("objcpp", "Objective-C++"),
725 ("regex", "Regular Expressions (Python)"),
726 ("regexp", "Regular Expressions (Python)"),
727 ("proto", "Protocol Buffers"),
729 ("protobuf", "Protocol Buffers"),
730 ("gomod", "Go Module"),
731 ("git-rebase", "Git Rebase Todo"),
732 ("git-commit", "Git Commit Message"),
733 ("git-config", "Git Config"),
734 ("git-attributes", "Git Attributes"),
735 ("gitignore", "Gitignore"),
736 ("fsharp", "FSharp"),
737 ("f#", "FSharp"),
738 ("terraform", "HCL"),
739 ("tf", "HCL"),
740 ("ts", "TypeScript"),
741 ("js", "JavaScript"),
742 ("py", "Python"),
743 ("rb", "Ruby"),
744 ("rs", "Rust"),
745 ("md", "Markdown"),
746 ("yml", "YAML"),
747 ("dockerfile", "Dockerfile"),
748 ]
749 }
750
751 pub(crate) fn populate_built_in_aliases(&mut self) {
758 for (short, full) in Self::built_in_aliases() {
759 self.register_alias_inner(short, full, true);
760 }
761 self.rebuild_catalog();
762 }
763
764 pub(crate) fn register_alias(&mut self, short_name: &str, full_name: &str) -> bool {
774 if !self.register_alias_inner(short_name, full_name, false) {
775 return false;
776 }
777 let short_lower = short_name.to_lowercase();
778 let full_lower = full_name.to_lowercase();
779 if let Some(&idx) = self.catalog_by_name.get(&full_lower) {
780 self.catalog_by_name
781 .entry(short_lower.clone())
782 .or_insert(idx);
783 let entry = &mut self.catalog[idx];
784 let replace = match &entry.short_name {
785 None => true,
786 Some(existing) => short_name.len() < existing.len(),
787 };
788 if replace {
789 entry.short_name = Some(short_lower);
790 }
791 }
792 true
793 }
794
795 fn register_alias_inner(
796 &mut self,
797 short_name: &str,
798 full_name: &str,
799 is_built_in: bool,
800 ) -> bool {
801 let short_lower = short_name.to_lowercase();
802
803 let target_exists = self
805 .syntax_set
806 .syntaxes()
807 .iter()
808 .any(|s| s.name.eq_ignore_ascii_case(full_name));
809 if !target_exists {
810 if is_built_in {
811 tracing::warn!(
814 "[grammar-alias] Built-in alias '{}' -> '{}': target grammar not found, skipping",
815 short_name, full_name
816 );
817 } else {
818 tracing::warn!(
819 "[grammar-alias] Alias '{}' -> '{}': target grammar not found, skipping",
820 short_name,
821 full_name
822 );
823 }
824 return false;
825 }
826
827 let collides_with_full_name = self
829 .syntax_set
830 .syntaxes()
831 .iter()
832 .any(|s| s.name.eq_ignore_ascii_case(&short_lower));
833 if collides_with_full_name {
834 tracing::debug!(
838 "[grammar-alias] Alias '{}' matches an existing grammar name, skipping (not needed)",
839 short_name
840 );
841 return false;
842 }
843
844 if let Some(existing_target) = self.aliases.get(&short_lower) {
846 if existing_target.eq_ignore_ascii_case(full_name) {
847 return true;
849 }
850 let msg = format!(
851 "Alias '{}' already maps to '{}', cannot remap to '{}'",
852 short_name, existing_target, full_name
853 );
854 if is_built_in {
855 panic!("[grammar-alias] Built-in alias collision: {}", msg);
856 } else {
857 tracing::warn!("[grammar-alias] {}", msg);
858 return false;
859 }
860 }
861
862 let exact_name = self
864 .syntax_set
865 .syntaxes()
866 .iter()
867 .find(|s| s.name.eq_ignore_ascii_case(full_name))
868 .map(|s| s.name.clone())
869 .unwrap();
870
871 self.aliases.insert(short_lower, exact_name);
872 true
873 }
874
875 pub(crate) fn rebuild_catalog(&mut self) {
890 let mut short_by_full: HashMap<String, String> = HashMap::new();
897 let record = |map: &mut HashMap<String, String>, short: &str, full: &str| {
898 let key = full.to_lowercase();
899 let keep = match map.get(&key) {
900 None => true,
901 Some(existing) => short.len() < existing.len(),
902 };
903 if keep {
904 map.insert(key, short.to_string());
905 }
906 };
907 for (short, full) in Self::built_in_aliases() {
908 record(&mut short_by_full, short, full);
909 }
910 for (short, full) in &self.aliases {
911 record(&mut short_by_full, short, full);
912 }
913
914 let derive_language_id =
915 |display_name: &str| -> (String, Option<fresh_languages::Language>) {
916 let ts = tree_sitter_for_syntect_name(display_name);
917 let id = ts
918 .map(|l| l.id().to_string())
919 .unwrap_or_else(|| display_name.to_lowercase());
920 (id, ts)
921 };
922
923 let mut catalog: Vec<GrammarEntry> = Vec::new();
924 let mut scope_to_index: HashMap<String, usize> = HashMap::new();
925
926 for (idx, syntax) in self.syntax_set.syntaxes().iter().enumerate() {
936 if syntax.name == "Plain Text" {
937 continue;
938 }
939 let (language_id, tree_sitter) = derive_language_id(&syntax.name);
940 let short_name = short_by_full.get(&syntax.name.to_lowercase()).cloned();
941 let source = self
942 .grammar_sources
943 .get(&syntax.name)
944 .map(|info| info.source.clone())
945 .unwrap_or(GrammarSource::BuiltIn);
946 let entry_index = catalog.len();
947 scope_to_index.insert(syntax.scope.to_string(), entry_index);
948
949 let mut extensions = syntax.file_extensions.clone();
955 if let Some(lang) = tree_sitter {
956 for ext in lang.extensions() {
957 let ext = ext.to_string();
958 if !extensions.iter().any(|e| e == &ext) {
959 extensions.push(ext);
960 }
961 }
962 }
963
964 catalog.push(GrammarEntry {
965 display_name: syntax.name.clone(),
966 language_id,
967 short_name,
968 extensions,
969 filenames: Vec::new(),
970 filename_globs: Vec::new(),
971 source,
972 engines: GrammarEngines {
973 syntect: Some(idx),
974 tree_sitter,
975 },
976 });
977 }
978
979 for (filename, scope) in &self.filename_scopes {
981 if let Some(&idx) = scope_to_index.get(scope) {
982 if !catalog[idx].filenames.iter().any(|f| f == filename) {
983 catalog[idx].filenames.push(filename.clone());
984 }
985 }
986 }
987
988 for (ext, scope) in &self.user_extensions {
990 if let Some(&idx) = scope_to_index.get(scope) {
991 if !catalog[idx].extensions.iter().any(|e| e == ext) {
992 catalog[idx].extensions.push(ext.clone());
993 }
994 }
995 }
996
997 let mut ts_covered: std::collections::HashSet<fresh_languages::Language> =
1002 std::collections::HashSet::new();
1003 for entry in &catalog {
1004 if let Some(lang) = entry.engines.tree_sitter {
1005 ts_covered.insert(lang);
1006 }
1007 }
1008 for lang in fresh_languages::Language::all() {
1009 if ts_covered.contains(lang) {
1010 continue;
1011 }
1012 let display_name = lang.display_name().to_string();
1013 let language_id = lang.id().to_string();
1014 let short_name = short_by_full.get(&display_name.to_lowercase()).cloned();
1015 let extensions: Vec<String> = lang.extensions().iter().map(|s| s.to_string()).collect();
1016 catalog.push(GrammarEntry {
1017 display_name,
1018 language_id,
1019 short_name,
1020 extensions,
1021 filenames: Vec::new(),
1022 filename_globs: Vec::new(),
1023 source: GrammarSource::BuiltIn,
1024 engines: GrammarEngines {
1025 syntect: None,
1026 tree_sitter: Some(*lang),
1027 },
1028 });
1029 }
1030
1031 let mut by_name: HashMap<String, usize> = HashMap::new();
1039 let mut by_extension: HashMap<String, usize> = HashMap::new();
1040 let mut by_filename: HashMap<String, usize> = HashMap::new();
1041 for (idx, entry) in catalog.iter().enumerate() {
1042 by_name.insert(entry.display_name.to_lowercase(), idx);
1043 by_name.insert(entry.language_id.to_lowercase(), idx);
1044 if let Some(short) = &entry.short_name {
1045 by_name.insert(short.to_lowercase(), idx);
1046 }
1047 for ext in &entry.extensions {
1048 by_extension.entry(ext.to_lowercase()).or_insert(idx);
1049 by_filename.entry(ext.clone()).or_insert(idx);
1050 }
1051 for filename in &entry.filenames {
1052 by_filename.entry(filename.clone()).or_insert(idx);
1053 }
1054 }
1055
1056 self.catalog = catalog;
1057 self.catalog_by_name = by_name;
1058 self.catalog_by_extension = by_extension;
1059 self.catalog_by_filename = by_filename;
1060
1061 if !self.applied_language_config.is_empty() {
1065 let cfg = std::mem::take(&mut self.applied_language_config);
1066 self.apply_language_config_inner(&cfg);
1067 self.applied_language_config = cfg;
1068 }
1069 }
1070
1071 pub fn catalog(&self) -> &[GrammarEntry] {
1073 &self.catalog
1074 }
1075
1076 pub fn find_by_name(&self, name: &str) -> Option<&GrammarEntry> {
1082 self.catalog_by_name
1083 .get(&name.to_lowercase())
1084 .map(|&idx| &self.catalog[idx])
1085 }
1086
1087 pub fn find_by_path(&self, path: &Path) -> Option<&GrammarEntry> {
1097 let filename = path.file_name().and_then(|n| n.to_str());
1098 let path_str = path.to_str().unwrap_or("");
1099
1100 if let Some(name) = filename {
1101 if let Some(&idx) = self.catalog_by_filename.get(name) {
1102 return Some(&self.catalog[idx]);
1103 }
1104 }
1105
1106 if let Some(name) = filename {
1108 for entry in &self.catalog {
1109 for pattern in &entry.filename_globs {
1110 let matched = if is_path_pattern(pattern) {
1111 path_glob_matches(pattern, path_str)
1112 } else {
1113 filename_glob_matches(pattern, name)
1114 };
1115 if matched {
1116 return Some(entry);
1117 }
1118 }
1119 }
1120 }
1121
1122 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1123 return self.find_by_extension(ext);
1124 }
1125 None
1126 }
1127
1128 pub fn find_by_extension(&self, ext: &str) -> Option<&GrammarEntry> {
1130 self.catalog_by_extension
1131 .get(&ext.to_lowercase())
1132 .map(|&idx| &self.catalog[idx])
1133 }
1134
1135 pub fn apply_language_config(
1148 &mut self,
1149 languages: &HashMap<String, crate::config::LanguageConfig>,
1150 ) {
1151 self.applied_language_config = languages.clone();
1152 self.apply_language_config_inner(languages);
1153 }
1154
1155 fn apply_language_config_inner(
1160 &mut self,
1161 languages: &HashMap<String, crate::config::LanguageConfig>,
1162 ) {
1163 for (lang_id, lang_cfg) in languages {
1164 let grammar_name = if lang_cfg.grammar.is_empty() {
1165 lang_id.as_str()
1166 } else {
1167 lang_cfg.grammar.as_str()
1168 };
1169
1170 let idx = self
1172 .catalog_by_name
1173 .get(&grammar_name.to_lowercase())
1174 .copied()
1175 .or_else(|| self.catalog_by_name.get(&lang_id.to_lowercase()).copied())
1176 .unwrap_or_else(|| {
1177 let idx = self.catalog.len();
1178 self.catalog.push(GrammarEntry {
1179 display_name: lang_id.clone(),
1180 language_id: lang_id.clone(),
1181 short_name: None,
1182 extensions: Vec::new(),
1183 filenames: Vec::new(),
1184 filename_globs: Vec::new(),
1185 source: GrammarSource::BuiltIn,
1186 engines: GrammarEngines::default(),
1187 });
1188 idx
1189 });
1190
1191 self.catalog_by_name
1196 .entry(lang_id.to_lowercase())
1197 .or_insert(idx);
1198
1199 for ext in &lang_cfg.extensions {
1200 if !self.catalog[idx].extensions.iter().any(|e| e == ext) {
1201 self.catalog[idx].extensions.push(ext.clone());
1202 }
1203 self.catalog_by_extension.insert(ext.to_lowercase(), idx);
1205 }
1206 for filename in &lang_cfg.filenames {
1207 if is_glob_pattern(filename) {
1208 if !self.catalog[idx]
1209 .filename_globs
1210 .iter()
1211 .any(|f| f == filename)
1212 {
1213 self.catalog[idx].filename_globs.push(filename.clone());
1214 }
1215 } else {
1216 if !self.catalog[idx].filenames.iter().any(|f| f == filename) {
1217 self.catalog[idx].filenames.push(filename.clone());
1218 }
1219 self.catalog_by_filename.insert(filename.clone(), idx);
1220 }
1221 }
1222 }
1223 }
1224
1225 pub fn syntax_set(&self) -> &Arc<SyntaxSet> {
1227 &self.syntax_set
1228 }
1229
1230 pub fn syntax_set_arc(&self) -> Arc<SyntaxSet> {
1232 Arc::clone(&self.syntax_set)
1233 }
1234
1235 pub fn available_syntaxes(&self) -> Vec<&str> {
1237 self.syntax_set
1238 .syntaxes()
1239 .iter()
1240 .map(|s| s.name.as_str())
1241 .collect()
1242 }
1243
1244 pub fn available_grammar_info(&self) -> Vec<GrammarInfo> {
1251 let mut result: Vec<GrammarInfo> = self
1252 .catalog
1253 .iter()
1254 .map(|entry| GrammarInfo {
1255 name: entry.display_name.clone(),
1256 source: entry.source.clone(),
1257 file_extensions: entry.extensions.clone(),
1258 short_name: entry.short_name.clone(),
1259 })
1260 .collect();
1261 result.sort_by(|a, b| a.name.to_lowercase().cmp(&b.name.to_lowercase()));
1262 result
1263 }
1264
1265 pub(crate) fn grammar_sources(&self) -> &HashMap<String, GrammarInfo> {
1267 &self.grammar_sources
1268 }
1269
1270 pub(crate) fn build_grammar_sources_from_syntax_set(
1274 syntax_set: &SyntaxSet,
1275 ) -> HashMap<String, GrammarInfo> {
1276 let mut sources = HashMap::new();
1277 for syntax in syntax_set.syntaxes() {
1278 sources.insert(
1279 syntax.name.clone(),
1280 GrammarInfo {
1281 name: syntax.name.clone(),
1282 source: GrammarSource::BuiltIn,
1283 file_extensions: syntax.file_extensions.clone(),
1284 short_name: None,
1285 },
1286 );
1287 }
1288 sources
1289 }
1290
1291 #[cfg(test)]
1293 pub(crate) fn user_extensions(&self) -> &HashMap<String, String> {
1294 &self.user_extensions
1295 }
1296
1297 #[cfg(test)]
1299 pub(crate) fn loaded_grammar_paths(&self) -> &[GrammarSpec] {
1300 &self.loaded_grammar_paths
1301 }
1302
1303 pub fn with_additional_grammars(
1317 base: &GrammarRegistry,
1318 additional: &[GrammarSpec],
1319 ) -> Option<Self> {
1320 tracing::info!(
1321 "[SYNTAX DEBUG] with_additional_grammars: adding {} grammars to base with {} syntaxes",
1322 additional.len(),
1323 base.syntax_set.syntaxes().len()
1324 );
1325
1326 let mut builder = (*base.syntax_set).clone().into_builder();
1330
1331 let mut user_extensions = base.user_extensions.clone();
1333
1334 let mut loaded_grammar_paths = base.loaded_grammar_paths.clone();
1336
1337 let mut grammar_sources = base.grammar_sources.clone();
1339
1340 for spec in additional {
1342 tracing::info!(
1343 "[SYNTAX DEBUG] loading new grammar file: lang='{}', path={:?}, extensions={:?}",
1344 spec.language,
1345 spec.path,
1346 spec.extensions
1347 );
1348 match Self::load_grammar_file(&spec.path) {
1349 Ok(syntax) => {
1350 let scope = syntax.scope.to_string();
1351 let syntax_name = syntax.name.clone();
1352 tracing::info!(
1353 "[SYNTAX DEBUG] grammar loaded successfully: name='{}', scope='{}'",
1354 syntax_name,
1355 scope
1356 );
1357 builder.add(syntax);
1358 tracing::info!(
1359 "Loaded grammar for '{}' from {:?} with extensions {:?}",
1360 spec.language,
1361 spec.path,
1362 spec.extensions
1363 );
1364 for ext in &spec.extensions {
1366 user_extensions.insert(ext.clone(), scope.clone());
1367 }
1368 grammar_sources.insert(
1370 syntax_name.clone(),
1371 GrammarInfo {
1372 name: syntax_name,
1373 source: GrammarSource::Plugin {
1374 plugin: spec.language.clone(),
1375 path: spec.path.clone(),
1376 },
1377 file_extensions: spec.extensions.clone(),
1378 short_name: None,
1379 },
1380 );
1381 loaded_grammar_paths.push(spec.clone());
1383 }
1384 Err(e) => {
1385 tracing::warn!(
1386 "Failed to load grammar for '{}' from {:?}: {}",
1387 spec.language,
1388 spec.path,
1389 e
1390 );
1391 }
1392 }
1393 }
1394
1395 let mut reg = Self {
1396 syntax_set: Arc::new(builder.build()),
1397 user_extensions,
1398 filename_scopes: base.filename_scopes.clone(),
1399 loaded_grammar_paths,
1400 grammar_sources,
1401 aliases: base.aliases.clone(),
1402 catalog: Vec::new(),
1403 catalog_by_name: HashMap::new(),
1404 catalog_by_extension: HashMap::new(),
1405 catalog_by_filename: HashMap::new(),
1406 applied_language_config: HashMap::new(),
1407 };
1408 reg.rebuild_catalog();
1409 Some(reg)
1410 }
1411
1412 pub(crate) fn load_grammar_file(path: &Path) -> Result<SyntaxDefinition, String> {
1418 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
1419
1420 match ext {
1421 "sublime-syntax" => {
1422 let content = std::fs::read_to_string(path)
1423 .map_err(|e| format!("Failed to read file: {}", e))?;
1424 SyntaxDefinition::load_from_str(
1425 &content,
1426 true,
1427 path.file_stem().and_then(|s| s.to_str()),
1428 )
1429 .map_err(|e| format!("Failed to parse sublime-syntax: {}", e))
1430 }
1431 _ => Err(format!(
1432 "Unsupported grammar format: .{}. Only .sublime-syntax is supported.",
1433 ext
1434 )),
1435 }
1436 }
1437}
1438
1439impl Default for GrammarRegistry {
1440 fn default() -> Self {
1441 let defaults = SyntaxSet::load_defaults_newlines();
1443 let mut builder = defaults.into_builder();
1444 Self::add_embedded_grammars(&mut builder);
1445 let syntax_set = builder.build();
1446 let filename_scopes = Self::build_filename_scopes();
1447 let extra_extensions = Self::build_extra_extensions();
1448
1449 let mut registry = Self::new(syntax_set, extra_extensions, filename_scopes);
1450 registry.populate_built_in_aliases();
1451 registry.rebuild_catalog();
1452 registry
1453 }
1454}
1455
1456#[derive(Debug, Deserialize)]
1459pub struct PackageManifest {
1460 #[serde(default)]
1461 pub contributes: Option<Contributes>,
1462}
1463
1464#[derive(Debug, Deserialize, Default)]
1465pub struct Contributes {
1466 #[serde(default)]
1467 pub languages: Vec<LanguageContribution>,
1468 #[serde(default)]
1469 pub grammars: Vec<GrammarContribution>,
1470}
1471
1472#[derive(Debug, Deserialize)]
1473pub struct LanguageContribution {
1474 pub id: String,
1475 #[serde(default)]
1476 pub extensions: Vec<String>,
1477}
1478
1479#[derive(Debug, Deserialize)]
1480pub struct GrammarContribution {
1481 pub language: String,
1482 #[serde(rename = "scopeName")]
1483 pub scope_name: String,
1484 pub path: String,
1485}
1486
1487#[cfg(test)]
1488mod tests {
1489 use super::*;
1490
1491 #[test]
1492 fn test_empty_registry() {
1493 let registry = GrammarRegistry::empty();
1494 assert!(!registry.available_syntaxes().is_empty());
1496 }
1497
1498 #[test]
1499 fn test_default_registry() {
1500 let registry = GrammarRegistry::default();
1501 assert!(!registry.available_syntaxes().is_empty());
1503 }
1504
1505 #[test]
1506 fn test_find_syntax_for_common_extensions() {
1507 let registry = GrammarRegistry::default();
1508
1509 let test_cases = [
1511 ("test.py", true),
1512 ("test.rs", true),
1513 ("test.js", true),
1514 ("test.json", true),
1515 ("test.md", true),
1516 ("test.html", true),
1517 ("test.css", true),
1518 ("test.unknown_extension_xyz", false),
1519 ];
1520
1521 for (filename, should_exist) in test_cases {
1522 let path = Path::new(filename);
1523 let result = registry.find_syntax_for_file(path);
1524 assert_eq!(
1525 result.is_some(),
1526 should_exist,
1527 "Expected {:?} for {}",
1528 should_exist,
1529 filename
1530 );
1531 }
1532 }
1533
1534 #[test]
1535 fn test_syntax_set_arc() {
1536 let registry = GrammarRegistry::default();
1537 let arc1 = registry.syntax_set_arc();
1538 let arc2 = registry.syntax_set_arc();
1539 assert!(Arc::ptr_eq(&arc1, &arc2));
1541 }
1542
1543 #[test]
1544 fn test_shell_dotfiles_detection() {
1545 let registry = GrammarRegistry::default();
1546
1547 let shell_files = [".zshrc", ".zprofile", ".zshenv", ".bash_aliases"];
1549
1550 for filename in shell_files {
1551 let path = Path::new(filename);
1552 let result = registry.find_syntax_for_file(path);
1553 assert!(
1554 result.is_some(),
1555 "{} should be detected as a syntax",
1556 filename
1557 );
1558 let syntax = result.unwrap();
1559 assert!(
1561 syntax.name.to_lowercase().contains("bash")
1562 || syntax.name.to_lowercase().contains("shell"),
1563 "{} should be detected as shell/bash, got: {}",
1564 filename,
1565 syntax.name
1566 );
1567 }
1568 }
1569
1570 #[test]
1571 fn test_pkgbuild_detection() {
1572 let registry = GrammarRegistry::default();
1573
1574 for filename in ["PKGBUILD", "APKBUILD"] {
1576 let path = Path::new(filename);
1577 let result = registry.find_syntax_for_file(path);
1578 assert!(
1579 result.is_some(),
1580 "{} should be detected as a syntax",
1581 filename
1582 );
1583 let syntax = result.unwrap();
1584 assert!(
1586 syntax.name.to_lowercase().contains("bash")
1587 || syntax.name.to_lowercase().contains("shell"),
1588 "{} should be detected as shell/bash, got: {}",
1589 filename,
1590 syntax.name
1591 );
1592 }
1593 }
1594
1595 #[test]
1596 fn test_find_syntax_with_glob_filenames() {
1597 let mut registry = GrammarRegistry::default();
1598 let mut languages = std::collections::HashMap::new();
1599 languages.insert(
1600 "shell-configs".to_string(),
1601 crate::config::LanguageConfig {
1602 extensions: vec!["sh".to_string()],
1603 filenames: vec!["*.conf".to_string(), "*rc".to_string()],
1604 grammar: "bash".to_string(),
1605 comment_prefix: Some("#".to_string()),
1606 auto_indent: true,
1607 auto_close: None,
1608 auto_surround: None,
1609 textmate_grammar: None,
1610 show_whitespace_tabs: true,
1611 line_wrap: None,
1612 wrap_column: None,
1613 page_view: None,
1614 page_width: None,
1615 use_tabs: None,
1616 tab_size: None,
1617 formatter: None,
1618 format_on_save: false,
1619 on_save: vec![],
1620 word_characters: None,
1621 },
1622 );
1623 registry.apply_language_config(&languages);
1624
1625 assert!(
1626 registry.find_by_path(Path::new("nftables.conf")).is_some(),
1627 "*.conf should match nftables.conf"
1628 );
1629 assert!(
1630 registry.find_by_path(Path::new("lfrc")).is_some(),
1631 "*rc should match lfrc"
1632 );
1633 let _ = registry.find_by_path(Path::new("randomfile"));
1635 }
1636
1637 #[test]
1638 fn test_find_syntax_with_path_glob_filenames() {
1639 let mut registry = GrammarRegistry::default();
1640 let mut languages = std::collections::HashMap::new();
1641 languages.insert(
1642 "shell-configs".to_string(),
1643 crate::config::LanguageConfig {
1644 extensions: vec!["sh".to_string()],
1645 filenames: vec!["/etc/**/rc.*".to_string()],
1646 grammar: "bash".to_string(),
1647 comment_prefix: Some("#".to_string()),
1648 auto_indent: true,
1649 auto_close: None,
1650 auto_surround: None,
1651 textmate_grammar: None,
1652 show_whitespace_tabs: true,
1653 line_wrap: None,
1654 wrap_column: None,
1655 page_view: None,
1656 page_width: None,
1657 use_tabs: None,
1658 tab_size: None,
1659 formatter: None,
1660 format_on_save: false,
1661 on_save: vec![],
1662 word_characters: None,
1663 },
1664 );
1665 registry.apply_language_config(&languages);
1666
1667 assert!(
1668 registry.find_by_path(Path::new("/etc/rc.conf")).is_some(),
1669 "/etc/**/rc.* should match /etc/rc.conf"
1670 );
1671 assert!(
1672 registry
1673 .find_by_path(Path::new("/etc/init/rc.local"))
1674 .is_some(),
1675 "/etc/**/rc.* should match /etc/init/rc.local"
1676 );
1677 let _ = registry.find_by_path(Path::new("/var/rc.conf"));
1678 }
1679
1680 #[test]
1681 fn test_exact_filename_takes_priority_over_glob() {
1682 let mut registry = GrammarRegistry::default();
1683 let mut languages = std::collections::HashMap::new();
1684
1685 languages.insert(
1687 "custom-lfrc".to_string(),
1688 crate::config::LanguageConfig {
1689 extensions: vec![],
1690 filenames: vec!["lfrc".to_string()],
1691 grammar: "python".to_string(),
1692 comment_prefix: Some("#".to_string()),
1693 auto_indent: true,
1694 auto_close: None,
1695 auto_surround: None,
1696 textmate_grammar: None,
1697 show_whitespace_tabs: true,
1698 line_wrap: None,
1699 wrap_column: None,
1700 page_view: None,
1701 page_width: None,
1702 use_tabs: None,
1703 tab_size: None,
1704 formatter: None,
1705 format_on_save: false,
1706 on_save: vec![],
1707 word_characters: None,
1708 },
1709 );
1710
1711 languages.insert(
1713 "rc-files".to_string(),
1714 crate::config::LanguageConfig {
1715 extensions: vec![],
1716 filenames: vec!["*rc".to_string()],
1717 grammar: "bash".to_string(),
1718 comment_prefix: Some("#".to_string()),
1719 auto_indent: true,
1720 auto_close: None,
1721 auto_surround: None,
1722 textmate_grammar: None,
1723 show_whitespace_tabs: true,
1724 line_wrap: None,
1725 wrap_column: None,
1726 page_view: None,
1727 page_width: None,
1728 use_tabs: None,
1729 tab_size: None,
1730 formatter: None,
1731 format_on_save: false,
1732 on_save: vec![],
1733 word_characters: None,
1734 },
1735 );
1736
1737 registry.apply_language_config(&languages);
1738
1739 let entry = registry.find_by_path(Path::new("lfrc")).unwrap();
1741 assert!(
1742 entry.display_name.to_lowercase().contains("python"),
1743 "exact match should win over glob, got: {}",
1744 entry.display_name
1745 );
1746 }
1747
1748 #[test]
1749 fn test_built_in_aliases_resolve() {
1750 let registry = GrammarRegistry::default();
1751
1752 let syntax = registry.find_syntax_by_name("bash");
1754 assert!(syntax.is_some(), "alias 'bash' should resolve");
1755 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1756
1757 let syntax = registry.find_syntax_by_name("cpp");
1759 assert!(syntax.is_some(), "alias 'cpp' should resolve");
1760 assert_eq!(syntax.unwrap().name, "C++");
1761
1762 let syntax = registry.find_syntax_by_name("csharp");
1764 assert!(syntax.is_some(), "alias 'csharp' should resolve");
1765 assert_eq!(syntax.unwrap().name, "C#");
1766
1767 let syntax = registry.find_syntax_by_name("sh");
1769 assert!(syntax.is_some(), "alias 'sh' should resolve");
1770 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1771
1772 let syntax = registry.find_syntax_by_name("proto");
1774 assert!(syntax.is_some(), "alias 'proto' should resolve");
1775 assert_eq!(syntax.unwrap().name, "Protocol Buffers");
1776 }
1777
1778 #[test]
1779 fn test_alias_case_insensitive_input() {
1780 let registry = GrammarRegistry::default();
1781
1782 let syntax = registry.find_syntax_by_name("BASH");
1784 assert!(
1785 syntax.is_some(),
1786 "alias 'BASH' should resolve case-insensitively"
1787 );
1788 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1789
1790 let syntax = registry.find_syntax_by_name("Cpp");
1791 assert!(
1792 syntax.is_some(),
1793 "alias 'Cpp' should resolve case-insensitively"
1794 );
1795 assert_eq!(syntax.unwrap().name, "C++");
1796 }
1797
1798 #[test]
1799 fn test_full_name_still_works() {
1800 let registry = GrammarRegistry::default();
1801
1802 let syntax = registry.find_syntax_by_name("Bourne Again Shell (bash)");
1804 assert!(syntax.is_some(), "full name should still resolve");
1805 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1806
1807 let syntax = registry.find_syntax_by_name("bourne again shell (bash)");
1809 assert!(
1810 syntax.is_some(),
1811 "case-insensitive full name should resolve"
1812 );
1813 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1814 }
1815
1816 #[test]
1817 fn test_alias_does_not_shadow_full_names() {
1818 let registry = GrammarRegistry::default();
1819
1820 let syntax = registry.find_syntax_by_name("rust");
1822 assert!(syntax.is_some());
1823 assert_eq!(syntax.unwrap().name, "Rust");
1824
1825 let syntax = registry.find_syntax_by_name("go");
1827 assert!(syntax.is_some());
1828 assert_eq!(syntax.unwrap().name, "Go");
1829 }
1830
1831 #[test]
1832 fn test_register_alias_rejects_collision() {
1833 let mut registry = GrammarRegistry::default();
1834
1835 assert!(registry.register_alias("myalias", "Rust"));
1837 assert!(!registry.register_alias("myalias", "Go"));
1838
1839 assert!(registry.register_alias("myalias", "Rust"));
1841 }
1842
1843 #[test]
1844 fn test_register_alias_rejects_nonexistent_target() {
1845 let mut registry = GrammarRegistry::default();
1846 assert!(!registry.register_alias("nope", "Nonexistent Grammar"));
1847 }
1848
1849 #[test]
1850 fn test_register_alias_skips_existing_grammar_name() {
1851 let mut registry = GrammarRegistry::default();
1852
1853 assert!(!registry.register_alias("rust", "Rust"));
1855 assert!(registry.find_syntax_by_name("rust").is_some());
1857 }
1858
1859 #[test]
1860 fn test_available_grammar_info_includes_short_names() {
1861 let registry = GrammarRegistry::default();
1862 let infos = registry.available_grammar_info();
1863
1864 let bash_info = infos.iter().find(|g| g.name == "Bourne Again Shell (bash)");
1865 assert!(bash_info.is_some(), "bash grammar should be in the list");
1866 let bash_info = bash_info.unwrap();
1867 assert!(
1868 bash_info.short_name.is_some(),
1869 "bash grammar should have a short_name"
1870 );
1871 assert_eq!(bash_info.short_name.as_deref(), Some("sh"));
1873 }
1874
1875 #[test]
1876 fn test_catalog_contains_each_language_once() {
1877 let registry = GrammarRegistry::default();
1878 let catalog = registry.catalog();
1879
1880 let mut seen = std::collections::HashSet::new();
1882 for entry in catalog {
1883 let key = entry.display_name.to_lowercase();
1884 assert!(
1885 seen.insert(key.clone()),
1886 "duplicate catalog entry for display_name={:?}",
1887 entry.display_name
1888 );
1889 }
1890
1891 let ts = registry
1894 .find_by_name("TypeScript")
1895 .expect("TypeScript must be in the catalog");
1896 assert!(ts.engines.syntect.is_none());
1897 assert_eq!(
1898 ts.engines.tree_sitter,
1899 Some(fresh_languages::Language::TypeScript)
1900 );
1901 assert_eq!(ts.language_id, "typescript");
1902 assert!(ts.extensions.iter().any(|e| e == "ts"));
1903
1904 for name in ["Rust", "Python", "JavaScript"] {
1907 let entry = registry
1908 .find_by_name(name)
1909 .unwrap_or_else(|| panic!("{} must be in the catalog", name));
1910 assert!(
1911 entry.engines.syntect.is_some(),
1912 "{} should have a syntect index",
1913 name
1914 );
1915 assert!(
1916 entry.engines.tree_sitter.is_some(),
1917 "{} should also have a tree-sitter language",
1918 name
1919 );
1920 let by_id = registry
1923 .find_by_name(&entry.language_id)
1924 .expect("language_id should resolve");
1925 assert_eq!(by_id.display_name, entry.display_name);
1926 }
1927 }
1928
1929 #[test]
1930 fn test_catalog_find_by_path_and_extension() {
1931 let registry = GrammarRegistry::default();
1932 let ts = registry
1933 .find_by_path(Path::new("foo.ts"))
1934 .expect("foo.ts should resolve");
1935 assert_eq!(ts.display_name, "TypeScript");
1936 let rs = registry.find_by_extension("rs").expect("rs should resolve");
1937 assert_eq!(rs.display_name, "Rust");
1938 }
1939
1940 fn lang_cfg(
1942 grammar: &str,
1943 extensions: &[&str],
1944 filenames: &[&str],
1945 ) -> crate::config::LanguageConfig {
1946 crate::config::LanguageConfig {
1947 extensions: extensions.iter().map(|s| s.to_string()).collect(),
1948 filenames: filenames.iter().map(|s| s.to_string()).collect(),
1949 grammar: grammar.to_string(),
1950 comment_prefix: None,
1951 auto_indent: true,
1952 auto_close: None,
1953 auto_surround: None,
1954 textmate_grammar: None,
1955 show_whitespace_tabs: true,
1956 line_wrap: None,
1957 wrap_column: None,
1958 page_view: None,
1959 page_width: None,
1960 use_tabs: None,
1961 tab_size: None,
1962 formatter: None,
1963 format_on_save: false,
1964 on_save: vec![],
1965 word_characters: None,
1966 }
1967 }
1968
1969 #[test]
1973 fn test_user_alias_resolves_via_find_by_name() {
1974 let mut registry = GrammarRegistry::default();
1975 let mut languages = std::collections::HashMap::new();
1976 languages.insert("mylang".to_string(), lang_cfg("Rust", &[], &[]));
1977 registry.apply_language_config(&languages);
1978
1979 let entry = registry
1980 .find_by_name("mylang")
1981 .expect("user-declared alias 'mylang' must resolve");
1982 assert_eq!(entry.display_name, "Rust");
1983 }
1984
1985 #[test]
1989 fn test_register_alias_preserves_applied_language_config() {
1990 let mut registry = GrammarRegistry::default();
1991 let mut languages = std::collections::HashMap::new();
1992 languages.insert(
1993 "shell-configs".to_string(),
1994 lang_cfg("bash", &["myconf"], &["*.myconf"]),
1995 );
1996 registry.apply_language_config(&languages);
1997
1998 assert!(registry.find_by_extension("myconf").is_some());
2000 assert!(
2001 registry.find_by_path(Path::new("foo.myconf")).is_some(),
2002 "glob should match before register_alias"
2003 );
2004
2005 registry.register_alias("mycustom", "Rust");
2007
2008 assert!(
2009 registry.find_by_extension("myconf").is_some(),
2010 "config extension must survive register_alias"
2011 );
2012 assert!(
2013 registry.find_by_path(Path::new("foo.myconf")).is_some(),
2014 "glob must survive register_alias"
2015 );
2016 }
2017
2018 #[test]
2022 fn test_from_syntax_name_preserves_canonical_display_name() {
2023 use crate::primitives::detected_language::DetectedLanguage;
2024 let registry = GrammarRegistry::default();
2025 let languages = std::collections::HashMap::new();
2026
2027 let detected = DetectedLanguage::from_syntax_name("BASH", ®istry, &languages)
2028 .expect("BASH should resolve via alias");
2029 assert_eq!(
2030 detected.display_name, "Bourne Again Shell (bash)",
2031 "display_name must be canonical, not user-typed"
2032 );
2033 }
2034
2035 #[test]
2039 fn test_config_only_language_appears_in_catalog() {
2040 let mut registry = GrammarRegistry::default();
2041 let mut languages = std::collections::HashMap::new();
2042 languages.insert("fish".to_string(), lang_cfg("fish", &["fish"], &[]));
2044 registry.apply_language_config(&languages);
2045
2046 let entry = registry
2047 .find_by_name("fish")
2048 .expect("fish should be in the catalog after apply_language_config");
2049 assert!(entry.engines.syntect.is_none());
2050 assert!(entry.engines.tree_sitter.is_none());
2051 assert_eq!(entry.language_id, "fish");
2052 assert!(entry.extensions.iter().any(|e| e == "fish"));
2053 }
2054
2055 #[test]
2060 fn test_config_extension_overrides_builtin() {
2061 let mut registry = GrammarRegistry::default();
2062 assert_eq!(
2064 registry.find_by_extension("js").unwrap().display_name,
2065 "JavaScript"
2066 );
2067
2068 let mut languages = std::collections::HashMap::new();
2069 languages.insert(
2070 "ts-overlay".to_string(),
2071 lang_cfg("TypeScript", &["js"], &[]),
2072 );
2073 registry.apply_language_config(&languages);
2074
2075 assert_eq!(
2076 registry.find_by_extension("js").unwrap().display_name,
2077 "TypeScript",
2078 "user-config extension must win over built-in"
2079 );
2080 }
2081
2082 #[test]
2089 fn test_bare_filename_resolves_via_find_by_path() {
2090 let registry = GrammarRegistry::default();
2091 for (filename, expected_substr) in [
2092 ("Gemfile", "ruby"),
2093 ("Rakefile", "ruby"),
2094 ("Vagrantfile", "ruby"),
2095 ("Makefile", "makefile"),
2096 ("GNUmakefile", "makefile"),
2097 ] {
2098 let entry = registry
2099 .find_by_path(Path::new(filename))
2100 .unwrap_or_else(|| panic!("{} must resolve via catalog", filename));
2101 assert!(
2102 entry.display_name.to_lowercase().contains(expected_substr),
2103 "{} should resolve to {} grammar, got {}",
2104 filename,
2105 expected_substr,
2106 entry.display_name
2107 );
2108 }
2109 }
2110
2111 #[test]
2116 fn test_jsx_resolves_to_javascript() {
2117 let registry = GrammarRegistry::default();
2118 let entry = registry
2119 .find_by_path(Path::new("foo.jsx"))
2120 .expect("foo.jsx must resolve");
2121 assert_eq!(entry.display_name, "JavaScript");
2122 }
2123
2124 #[test]
2129 fn test_rebuild_catalog_replays_language_config() {
2130 let mut registry = GrammarRegistry::default();
2131 let mut languages = std::collections::HashMap::new();
2132 languages.insert(
2133 "myshell".to_string(),
2134 lang_cfg("bash", &["myext"], &["*.myglob"]),
2135 );
2136 registry.apply_language_config(&languages);
2137 assert!(registry.find_by_extension("myext").is_some());
2138 assert!(registry.find_by_path(Path::new("foo.myglob")).is_some());
2139
2140 registry.rebuild_catalog();
2143 assert!(
2144 registry.find_by_extension("myext").is_some(),
2145 "rebuild_catalog must replay applied user config"
2146 );
2147 assert!(
2148 registry.find_by_path(Path::new("foo.myglob")).is_some(),
2149 "rebuild_catalog must replay user globs"
2150 );
2151 }
2152
2153 #[test]
2156 fn test_apply_language_config_idempotent() {
2157 let mut registry = GrammarRegistry::default();
2158 let mut languages = std::collections::HashMap::new();
2159 languages.insert(
2160 "shell-cfg".to_string(),
2161 lang_cfg("bash", &["myconf"], &["*.myconf"]),
2162 );
2163
2164 registry.apply_language_config(&languages);
2165 let first_extensions = registry
2166 .find_by_name("bash")
2167 .unwrap()
2168 .extensions
2169 .iter()
2170 .filter(|e| e == &"myconf")
2171 .count();
2172 let first_globs = registry
2173 .find_by_name("bash")
2174 .unwrap()
2175 .filename_globs
2176 .iter()
2177 .filter(|g| g == &"*.myconf")
2178 .count();
2179 assert_eq!(first_extensions, 1);
2180 assert_eq!(first_globs, 1);
2181
2182 registry.apply_language_config(&languages);
2184 let second_extensions = registry
2185 .find_by_name("bash")
2186 .unwrap()
2187 .extensions
2188 .iter()
2189 .filter(|e| e == &"myconf")
2190 .count();
2191 let second_globs = registry
2192 .find_by_name("bash")
2193 .unwrap()
2194 .filename_globs
2195 .iter()
2196 .filter(|g| g == &"*.myconf")
2197 .count();
2198 assert_eq!(second_extensions, 1, "extensions must not duplicate");
2199 assert_eq!(second_globs, 1, "globs must not duplicate");
2200 }
2201
2202 #[test]
2206 fn test_tree_sitter_bridge() {
2207 assert_eq!(
2208 tree_sitter_for_syntect_name("Bourne Again Shell (bash)"),
2209 Some(fresh_languages::Language::Bash)
2210 );
2211 assert_eq!(
2212 tree_sitter_for_syntect_name("Rust"),
2213 Some(fresh_languages::Language::Rust)
2214 );
2215 assert_eq!(tree_sitter_for_syntect_name("Nushell"), None);
2217 assert_eq!(tree_sitter_for_syntect_name("does-not-exist"), None);
2219 }
2220}