1use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10use syntect::parsing::{SyntaxDefinition, SyntaxReference, SyntaxSet, SyntaxSetBuilder};
11
12pub use crate::primitives::glob_match::{
14 filename_glob_matches, is_glob_pattern, is_path_pattern, path_glob_matches,
15};
16
17#[derive(Clone, Debug)]
22pub struct GrammarSpec {
23 pub language: String,
25 pub path: PathBuf,
27 pub extensions: Vec<String>,
29}
30
31#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
33#[serde(tag = "type")]
34pub enum GrammarSource {
35 #[serde(rename = "built-in")]
37 BuiltIn,
38 #[serde(rename = "user")]
40 User { path: PathBuf },
41 #[serde(rename = "language-pack")]
43 LanguagePack { name: String, path: PathBuf },
44 #[serde(rename = "bundle")]
46 Bundle { name: String, path: PathBuf },
47 #[serde(rename = "plugin")]
49 Plugin { plugin: String, path: PathBuf },
50}
51
52impl std::fmt::Display for GrammarSource {
53 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54 match self {
55 GrammarSource::BuiltIn => write!(f, "built-in"),
56 GrammarSource::User { path } => write!(f, "user ({})", path.display()),
57 GrammarSource::LanguagePack { name, .. } => write!(f, "language-pack ({})", name),
58 GrammarSource::Bundle { name, .. } => write!(f, "bundle ({})", name),
59 GrammarSource::Plugin { plugin, .. } => write!(f, "plugin ({})", plugin),
60 }
61 }
62}
63
64#[derive(Clone, Debug, Serialize, Deserialize)]
66pub struct GrammarInfo {
67 pub name: String,
69 pub source: GrammarSource,
71 pub file_extensions: Vec<String>,
73 #[serde(default, skip_serializing_if = "Option::is_none")]
75 pub short_name: Option<String>,
76}
77
78const SYNTECT_TO_TREE_SITTER_ALIASES: &[(&str, fresh_languages::Language)] =
86 &[("Bourne Again Shell (bash)", fresh_languages::Language::Bash)];
87
88fn tree_sitter_for_syntect_name(display_name: &str) -> Option<fresh_languages::Language> {
91 for (syntect_name, lang) in SYNTECT_TO_TREE_SITTER_ALIASES {
92 if *syntect_name == display_name {
93 return Some(*lang);
94 }
95 }
96 fresh_languages::Language::all()
97 .iter()
98 .find(|l| l.display_name() == display_name)
99 .copied()
100}
101
102#[derive(Clone, Debug, Default)]
107pub struct GrammarEngines {
108 pub syntect: Option<usize>,
111 pub tree_sitter: Option<fresh_languages::Language>,
113}
114
115#[derive(Clone, Debug)]
124pub struct GrammarEntry {
125 pub display_name: String,
127 pub language_id: String,
129 pub short_name: Option<String>,
131 pub extensions: Vec<String>,
133 pub filenames: Vec<String>,
135 pub filename_globs: Vec<String>,
137 pub source: GrammarSource,
139 pub engines: GrammarEngines,
141}
142
143pub const TOML_GRAMMAR: &str = include_str!("../../grammars/toml.sublime-syntax");
145
146pub const ODIN_GRAMMAR: &str = include_str!("../../grammars/odin/Odin.sublime-syntax");
149
150pub const ZIG_GRAMMAR: &str = include_str!("../../grammars/zig.sublime-syntax");
152
153pub const GIT_REBASE_GRAMMAR: &str = include_str!("../../grammars/git-rebase.sublime-syntax");
155
156pub const GIT_COMMIT_GRAMMAR: &str = include_str!("../../grammars/git-commit.sublime-syntax");
158
159pub const GITIGNORE_GRAMMAR: &str = include_str!("../../grammars/gitignore.sublime-syntax");
161
162pub const GITCONFIG_GRAMMAR: &str = include_str!("../../grammars/gitconfig.sublime-syntax");
164
165pub const GITATTRIBUTES_GRAMMAR: &str = include_str!("../../grammars/gitattributes.sublime-syntax");
167
168pub const TYPST_GRAMMAR: &str = include_str!("../../grammars/typst.sublime-syntax");
170
171pub const DOCKERFILE_GRAMMAR: &str = include_str!("../../grammars/dockerfile.sublime-syntax");
173pub const INI_GRAMMAR: &str = include_str!("../../grammars/ini.sublime-syntax");
175pub const CMAKE_GRAMMAR: &str = include_str!("../../grammars/cmake.sublime-syntax");
177pub const SCSS_GRAMMAR: &str = include_str!("../../grammars/scss.sublime-syntax");
179pub const LESS_GRAMMAR: &str = include_str!("../../grammars/less.sublime-syntax");
181pub const POWERSHELL_GRAMMAR: &str = include_str!("../../grammars/powershell.sublime-syntax");
183pub const KOTLIN_GRAMMAR: &str = include_str!("../../grammars/kotlin.sublime-syntax");
185pub const SWIFT_GRAMMAR: &str = include_str!("../../grammars/swift.sublime-syntax");
187pub const DART_GRAMMAR: &str = include_str!("../../grammars/dart.sublime-syntax");
189pub const ELIXIR_GRAMMAR: &str = include_str!("../../grammars/elixir.sublime-syntax");
191pub const FSHARP_GRAMMAR: &str = include_str!("../../grammars/fsharp.sublime-syntax");
193pub const NIX_GRAMMAR: &str = include_str!("../../grammars/nix.sublime-syntax");
195pub const HCL_GRAMMAR: &str = include_str!("../../grammars/hcl.sublime-syntax");
197pub const PROTOBUF_GRAMMAR: &str = include_str!("../../grammars/protobuf.sublime-syntax");
199pub const GRAPHQL_GRAMMAR: &str = include_str!("../../grammars/graphql.sublime-syntax");
201pub const JULIA_GRAMMAR: &str = include_str!("../../grammars/julia.sublime-syntax");
203pub const NIM_GRAMMAR: &str = include_str!("../../grammars/nim.sublime-syntax");
205pub const GLEAM_GRAMMAR: &str = include_str!("../../grammars/gleam.sublime-syntax");
207pub const VLANG_GRAMMAR: &str = include_str!("../../grammars/vlang.sublime-syntax");
209pub const SOLIDITY_GRAMMAR: &str = include_str!("../../grammars/solidity.sublime-syntax");
211pub const KDL_GRAMMAR: &str = include_str!("../../grammars/kdl.sublime-syntax");
213pub const NUSHELL_GRAMMAR: &str = include_str!("../../grammars/nushell.sublime-syntax");
215pub const STARLARK_GRAMMAR: &str = include_str!("../../grammars/starlark.sublime-syntax");
217pub const JUSTFILE_GRAMMAR: &str = include_str!("../../grammars/justfile.sublime-syntax");
219pub const EARTHFILE_GRAMMAR: &str = include_str!("../../grammars/earthfile.sublime-syntax");
221pub const GOMOD_GRAMMAR: &str = include_str!("../../grammars/gomod.sublime-syntax");
223pub const VUE_GRAMMAR: &str = include_str!("../../grammars/vue.sublime-syntax");
225pub const SVELTE_GRAMMAR: &str = include_str!("../../grammars/svelte.sublime-syntax");
227pub const ASTRO_GRAMMAR: &str = include_str!("../../grammars/astro.sublime-syntax");
229pub const HYPRLANG_GRAMMAR: &str = include_str!("../../grammars/hyprlang.sublime-syntax");
231pub const AUTOHOTKEY_GRAMMAR: &str =
234 include_str!("../../grammars/autohotkey/AutoHotkey.sublime-syntax");
235pub const RACKET_GRAMMAR: &str = include_str!("../../grammars/racket.sublime-syntax");
237
238impl std::fmt::Debug for GrammarRegistry {
243 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
244 f.debug_struct("GrammarRegistry")
245 .field("syntax_count", &self.syntax_set.syntaxes().len())
246 .finish()
247 }
248}
249
250pub struct GrammarRegistry {
251 syntax_set: Arc<SyntaxSet>,
253 user_extensions: HashMap<String, String>,
255 filename_scopes: HashMap<String, String>,
257 loaded_grammar_paths: Vec<GrammarSpec>,
259 grammar_sources: HashMap<String, GrammarInfo>,
261 aliases: HashMap<String, String>,
265 catalog: Vec<GrammarEntry>,
269 catalog_by_name: HashMap<String, usize>,
272 catalog_by_extension: HashMap<String, usize>,
274 catalog_by_filename: HashMap<String, usize>,
276 applied_language_config: HashMap<String, crate::config::LanguageConfig>,
281}
282
283impl GrammarRegistry {
284 pub(crate) fn new(
289 syntax_set: SyntaxSet,
290 user_extensions: HashMap<String, String>,
291 filename_scopes: HashMap<String, String>,
292 ) -> Self {
293 Self::new_with_loaded_paths(
294 syntax_set,
295 user_extensions,
296 filename_scopes,
297 Vec::new(),
298 HashMap::new(),
299 )
300 }
301
302 pub(crate) fn new_with_loaded_paths(
307 syntax_set: SyntaxSet,
308 user_extensions: HashMap<String, String>,
309 filename_scopes: HashMap<String, String>,
310 loaded_grammar_paths: Vec<GrammarSpec>,
311 grammar_sources: HashMap<String, GrammarInfo>,
312 ) -> Self {
313 let mut reg = Self {
314 syntax_set: Arc::new(syntax_set),
315 user_extensions,
316 filename_scopes,
317 loaded_grammar_paths,
318 grammar_sources,
319 aliases: HashMap::new(),
320 catalog: Vec::new(),
321 catalog_by_name: HashMap::new(),
322 catalog_by_extension: HashMap::new(),
323 catalog_by_filename: HashMap::new(),
324 applied_language_config: HashMap::new(),
325 };
326 reg.rebuild_catalog();
327 reg
328 }
329
330 pub fn empty() -> Arc<Self> {
332 let mut builder = SyntaxSetBuilder::new();
333 builder.add_plain_text_syntax();
334 let mut reg = Self {
335 syntax_set: Arc::new(builder.build()),
336 user_extensions: HashMap::new(),
337 filename_scopes: HashMap::new(),
338 loaded_grammar_paths: Vec::new(),
339 grammar_sources: HashMap::new(),
340 aliases: HashMap::new(),
341 catalog: Vec::new(),
342 catalog_by_name: HashMap::new(),
343 catalog_by_extension: HashMap::new(),
344 catalog_by_filename: HashMap::new(),
345 applied_language_config: HashMap::new(),
346 };
347 reg.rebuild_catalog();
348 Arc::new(reg)
349 }
350
351 pub fn defaults_only() -> Arc<Self> {
358 tracing::info!("defaults_only: loading pre-compiled syntax packdump...");
362 let syntax_set: SyntaxSet = syntect::dumps::from_uncompressed_data(include_bytes!(
363 concat!(env!("OUT_DIR"), "/default_syntaxes.packdump")
364 ))
365 .expect("Failed to load pre-compiled syntax packdump");
366 tracing::info!(
367 "defaults_only: loaded ({} syntaxes)",
368 syntax_set.syntaxes().len()
369 );
370 let grammar_sources = Self::build_grammar_sources_from_syntax_set(&syntax_set);
371 let filename_scopes = Self::build_filename_scopes();
372 let extra_extensions = Self::build_extra_extensions();
373 let mut registry = Self {
374 syntax_set: Arc::new(syntax_set),
375 user_extensions: extra_extensions,
376 filename_scopes,
377 loaded_grammar_paths: Vec::new(),
378 grammar_sources,
379 aliases: HashMap::new(),
380 catalog: Vec::new(),
381 catalog_by_name: HashMap::new(),
382 catalog_by_extension: HashMap::new(),
383 catalog_by_filename: HashMap::new(),
384 applied_language_config: HashMap::new(),
385 };
386 registry.populate_built_in_aliases();
387 registry.rebuild_catalog();
388 Arc::new(registry)
389 }
390
391 pub(crate) fn build_extra_extensions() -> HashMap<String, String> {
396 let mut map = HashMap::new();
397
398 let js_scope = "source.js".to_string();
400 map.insert("cjs".to_string(), js_scope.clone());
401 map.insert("mjs".to_string(), js_scope);
402
403 map
407 }
408
409 pub(crate) fn build_filename_scopes() -> HashMap<String, String> {
411 let mut map = HashMap::new();
412
413 let shell_scope = "source.shell.bash".to_string();
415 for filename in [
416 ".zshrc",
417 ".zprofile",
418 ".zshenv",
419 ".zlogin",
420 ".zlogout",
421 ".bash_aliases",
422 "PKGBUILD",
425 "APKBUILD",
426 ] {
427 map.insert(filename.to_string(), shell_scope.clone());
428 }
429
430 let git_rebase_scope = "source.git-rebase-todo".to_string();
432 map.insert("git-rebase-todo".to_string(), git_rebase_scope);
433
434 let git_commit_scope = "source.git-commit".to_string();
436 for filename in ["COMMIT_EDITMSG", "MERGE_MSG", "SQUASH_MSG", "TAG_EDITMSG"] {
437 map.insert(filename.to_string(), git_commit_scope.clone());
438 }
439
440 let gitignore_scope = "source.gitignore".to_string();
442 for filename in [".gitignore", ".dockerignore", ".npmignore", ".hgignore"] {
443 map.insert(filename.to_string(), gitignore_scope.clone());
444 }
445
446 let gitconfig_scope = "source.gitconfig".to_string();
448 for filename in [".gitconfig", ".gitmodules"] {
449 map.insert(filename.to_string(), gitconfig_scope.clone());
450 }
451
452 let gitattributes_scope = "source.gitattributes".to_string();
454 map.insert(".gitattributes".to_string(), gitattributes_scope);
455
456 let groovy_scope = "source.groovy".to_string();
458 map.insert("Jenkinsfile".to_string(), groovy_scope);
459
460 let ruby_scope = "source.ruby".to_string();
463 map.insert("Brewfile".to_string(), ruby_scope);
464
465 let dockerfile_scope = "source.dockerfile".to_string();
467 map.insert("Dockerfile".to_string(), dockerfile_scope.clone());
468 map.insert("Containerfile".to_string(), dockerfile_scope.clone());
469 map.insert("Dockerfile.dev".to_string(), dockerfile_scope.clone());
471 map.insert("Dockerfile.prod".to_string(), dockerfile_scope.clone());
472 map.insert("Dockerfile.test".to_string(), dockerfile_scope.clone());
473 map.insert("Dockerfile.build".to_string(), dockerfile_scope.clone());
474
475 let cmake_scope = "source.cmake".to_string();
477 map.insert("CMakeLists.txt".to_string(), cmake_scope);
478
479 let starlark_scope = "source.starlark".to_string();
481 map.insert("BUILD".to_string(), starlark_scope.clone());
482 map.insert("BUILD.bazel".to_string(), starlark_scope.clone());
483 map.insert("WORKSPACE".to_string(), starlark_scope.clone());
484 map.insert("WORKSPACE.bazel".to_string(), starlark_scope.clone());
485 map.insert("Tiltfile".to_string(), starlark_scope);
486
487 let justfile_scope = "source.justfile".to_string();
489 map.insert("justfile".to_string(), justfile_scope.clone());
490 map.insert("Justfile".to_string(), justfile_scope.clone());
491 map.insert(".justfile".to_string(), justfile_scope);
492
493 let ini_scope = "source.ini".to_string();
495 map.insert(".editorconfig".to_string(), ini_scope);
496
497 let earthfile_scope = "source.earthfile".to_string();
499 map.insert("Earthfile".to_string(), earthfile_scope);
500
501 let hyprlang_scope = "source.hyprlang".to_string();
503 map.insert("hyprland.conf".to_string(), hyprlang_scope.clone());
504 map.insert("hyprpaper.conf".to_string(), hyprlang_scope.clone());
505 map.insert("hyprlock.conf".to_string(), hyprlang_scope);
506
507 let gomod_scope = "source.gomod".to_string();
509 map.insert("go.mod".to_string(), gomod_scope.clone());
510 map.insert("go.sum".to_string(), gomod_scope);
511
512 map
513 }
514
515 pub(crate) fn add_embedded_grammars(builder: &mut SyntaxSetBuilder) {
517 match SyntaxDefinition::load_from_str(TOML_GRAMMAR, true, Some("TOML")) {
519 Ok(syntax) => {
520 builder.add(syntax);
521 tracing::debug!("Loaded embedded TOML grammar");
522 }
523 Err(e) => {
524 tracing::warn!("Failed to load embedded TOML grammar: {}", e);
525 }
526 }
527
528 match SyntaxDefinition::load_from_str(ODIN_GRAMMAR, true, Some("Odin")) {
530 Ok(syntax) => {
531 builder.add(syntax);
532 tracing::debug!("Loaded embedded Odin grammar");
533 }
534 Err(e) => {
535 tracing::warn!("Failed to load embedded Odin grammar: {}", e);
536 }
537 }
538
539 match SyntaxDefinition::load_from_str(ZIG_GRAMMAR, true, Some("Zig")) {
541 Ok(syntax) => {
542 builder.add(syntax);
543 tracing::debug!("Loaded embedded Zig grammar");
544 }
545 Err(e) => {
546 tracing::warn!("Failed to load embedded Zig grammar: {}", e);
547 }
548 }
549
550 match SyntaxDefinition::load_from_str(GIT_REBASE_GRAMMAR, true, Some("Git Rebase Todo")) {
552 Ok(syntax) => {
553 builder.add(syntax);
554 tracing::debug!("Loaded embedded Git Rebase Todo grammar");
555 }
556 Err(e) => {
557 tracing::warn!("Failed to load embedded Git Rebase Todo grammar: {}", e);
558 }
559 }
560
561 match SyntaxDefinition::load_from_str(GIT_COMMIT_GRAMMAR, true, Some("Git Commit Message"))
563 {
564 Ok(syntax) => {
565 builder.add(syntax);
566 tracing::debug!("Loaded embedded Git Commit Message grammar");
567 }
568 Err(e) => {
569 tracing::warn!("Failed to load embedded Git Commit Message grammar: {}", e);
570 }
571 }
572
573 match SyntaxDefinition::load_from_str(GITIGNORE_GRAMMAR, true, Some("Gitignore")) {
575 Ok(syntax) => {
576 builder.add(syntax);
577 tracing::debug!("Loaded embedded Gitignore grammar");
578 }
579 Err(e) => {
580 tracing::warn!("Failed to load embedded Gitignore grammar: {}", e);
581 }
582 }
583
584 match SyntaxDefinition::load_from_str(GITCONFIG_GRAMMAR, true, Some("Git Config")) {
586 Ok(syntax) => {
587 builder.add(syntax);
588 tracing::debug!("Loaded embedded Git Config grammar");
589 }
590 Err(e) => {
591 tracing::warn!("Failed to load embedded Git Config grammar: {}", e);
592 }
593 }
594
595 match SyntaxDefinition::load_from_str(GITATTRIBUTES_GRAMMAR, true, Some("Git Attributes")) {
597 Ok(syntax) => {
598 builder.add(syntax);
599 tracing::debug!("Loaded embedded Git Attributes grammar");
600 }
601 Err(e) => {
602 tracing::warn!("Failed to load embedded Git Attributes grammar: {}", e);
603 }
604 }
605
606 match SyntaxDefinition::load_from_str(TYPST_GRAMMAR, true, Some("Typst")) {
608 Ok(syntax) => {
609 builder.add(syntax);
610 tracing::debug!("Loaded embedded Typst grammar");
611 }
612 Err(e) => {
613 tracing::warn!("Failed to load embedded Typst grammar: {}", e);
614 }
615 }
616
617 let additional_grammars: &[(&str, &str)] = &[
619 (DOCKERFILE_GRAMMAR, "Dockerfile"),
620 (INI_GRAMMAR, "INI"),
621 (CMAKE_GRAMMAR, "CMake"),
622 (SCSS_GRAMMAR, "SCSS"),
623 (LESS_GRAMMAR, "LESS"),
624 (POWERSHELL_GRAMMAR, "PowerShell"),
625 (KOTLIN_GRAMMAR, "Kotlin"),
626 (SWIFT_GRAMMAR, "Swift"),
627 (DART_GRAMMAR, "Dart"),
628 (ELIXIR_GRAMMAR, "Elixir"),
629 (FSHARP_GRAMMAR, "FSharp"),
630 (NIX_GRAMMAR, "Nix"),
631 (HCL_GRAMMAR, "HCL"),
632 (PROTOBUF_GRAMMAR, "Protocol Buffers"),
633 (GRAPHQL_GRAMMAR, "GraphQL"),
634 (JULIA_GRAMMAR, "Julia"),
635 (NIM_GRAMMAR, "Nim"),
636 (GLEAM_GRAMMAR, "Gleam"),
637 (VLANG_GRAMMAR, "V"),
638 (SOLIDITY_GRAMMAR, "Solidity"),
639 (KDL_GRAMMAR, "KDL"),
640 (NUSHELL_GRAMMAR, "Nushell"),
641 (STARLARK_GRAMMAR, "Starlark"),
642 (JUSTFILE_GRAMMAR, "Justfile"),
643 (EARTHFILE_GRAMMAR, "Earthfile"),
644 (GOMOD_GRAMMAR, "Go Module"),
645 (VUE_GRAMMAR, "Vue"),
646 (SVELTE_GRAMMAR, "Svelte"),
647 (ASTRO_GRAMMAR, "Astro"),
648 (HYPRLANG_GRAMMAR, "Hyprlang"),
649 (AUTOHOTKEY_GRAMMAR, "AutoHotkey"),
650 (RACKET_GRAMMAR, "Racket"),
651 ];
652
653 for (grammar_str, name) in additional_grammars {
654 match SyntaxDefinition::load_from_str(grammar_str, true, Some(name)) {
655 Ok(syntax) => {
656 builder.add(syntax);
657 tracing::debug!("Loaded embedded {} grammar", name);
658 }
659 Err(e) => {
660 tracing::warn!("Failed to load embedded {} grammar: {}", name, e);
661 }
662 }
663 }
664 }
665
666 pub fn find_syntax_for_file(&self, path: &Path) -> Option<&SyntaxReference> {
672 let entry = self.find_by_path(path, None)?;
673 entry
674 .engines
675 .syntect
676 .map(|i| &self.syntax_set.syntaxes()[i])
677 }
678
679 pub fn find_syntax_by_name(&self, name: &str) -> Option<&SyntaxReference> {
687 if let Some(entry) = self.find_by_name(name) {
688 if let Some(idx) = entry.engines.syntect {
689 return Some(&self.syntax_set.syntaxes()[idx]);
690 }
691 }
692 self.syntax_set.find_syntax_by_name(name)
696 }
697
698 fn built_in_aliases() -> Vec<(&'static str, &'static str)> {
707 vec![
708 ("bash", "Bourne Again Shell (bash)"),
710 ("shell", "Bourne Again Shell (bash)"),
711 ("sh", "Bourne Again Shell (bash)"),
712 ("c++", "C++"),
713 ("cpp", "C++"),
714 ("csharp", "C#"),
715 ("objc", "Objective-C"),
716 ("objcpp", "Objective-C++"),
717 ("regex", "Regular Expressions (Python)"),
718 ("regexp", "Regular Expressions (Python)"),
719 ("proto", "Protocol Buffers"),
721 ("protobuf", "Protocol Buffers"),
722 ("gomod", "Go Module"),
723 ("git-rebase", "Git Rebase Todo"),
724 ("git-commit", "Git Commit Message"),
725 ("git-config", "Git Config"),
726 ("git-attributes", "Git Attributes"),
727 ("gitignore", "Gitignore"),
728 ("fsharp", "FSharp"),
729 ("f#", "FSharp"),
730 ("terraform", "HCL"),
731 ("tf", "HCL"),
732 ("ts", "TypeScript"),
733 ("js", "JavaScript"),
734 ("py", "Python"),
735 ("rb", "Ruby"),
736 ("rs", "Rust"),
737 ("md", "Markdown"),
738 ("yml", "YAML"),
739 ("dockerfile", "Dockerfile"),
740 ]
741 }
742
743 pub(crate) fn populate_built_in_aliases(&mut self) {
750 for (short, full) in Self::built_in_aliases() {
751 self.register_alias_inner(short, full, true);
752 }
753 self.rebuild_catalog();
754 }
755
756 pub(crate) fn register_alias(&mut self, short_name: &str, full_name: &str) -> bool {
766 if !self.register_alias_inner(short_name, full_name, false) {
767 return false;
768 }
769 let short_lower = short_name.to_lowercase();
770 let full_lower = full_name.to_lowercase();
771 if let Some(&idx) = self.catalog_by_name.get(&full_lower) {
772 self.catalog_by_name
773 .entry(short_lower.clone())
774 .or_insert(idx);
775 let entry = &mut self.catalog[idx];
776 let replace = match &entry.short_name {
777 None => true,
778 Some(existing) => short_name.len() < existing.len(),
779 };
780 if replace {
781 entry.short_name = Some(short_lower);
782 }
783 }
784 true
785 }
786
787 fn register_alias_inner(
788 &mut self,
789 short_name: &str,
790 full_name: &str,
791 is_built_in: bool,
792 ) -> bool {
793 let short_lower = short_name.to_lowercase();
794
795 let target_exists = self
797 .syntax_set
798 .syntaxes()
799 .iter()
800 .any(|s| s.name.eq_ignore_ascii_case(full_name));
801 if !target_exists {
802 if tree_sitter_for_syntect_name(full_name).is_some() {
806 return false;
807 }
808 if is_built_in {
809 tracing::warn!(
812 "[grammar-alias] Built-in alias '{}' -> '{}': target grammar not found, skipping",
813 short_name, full_name
814 );
815 } else {
816 tracing::warn!(
817 "[grammar-alias] Alias '{}' -> '{}': target grammar not found, skipping",
818 short_name,
819 full_name
820 );
821 }
822 return false;
823 }
824
825 let collides_with_full_name = self
827 .syntax_set
828 .syntaxes()
829 .iter()
830 .any(|s| s.name.eq_ignore_ascii_case(&short_lower));
831 if collides_with_full_name {
832 tracing::debug!(
836 "[grammar-alias] Alias '{}' matches an existing grammar name, skipping (not needed)",
837 short_name
838 );
839 return false;
840 }
841
842 if let Some(existing_target) = self.aliases.get(&short_lower) {
844 if existing_target.eq_ignore_ascii_case(full_name) {
845 return true;
847 }
848 let msg = format!(
849 "Alias '{}' already maps to '{}', cannot remap to '{}'",
850 short_name, existing_target, full_name
851 );
852 if is_built_in {
853 panic!("[grammar-alias] Built-in alias collision: {}", msg);
854 } else {
855 tracing::warn!("[grammar-alias] {}", msg);
856 return false;
857 }
858 }
859
860 let exact_name = self
862 .syntax_set
863 .syntaxes()
864 .iter()
865 .find(|s| s.name.eq_ignore_ascii_case(full_name))
866 .map(|s| s.name.clone())
867 .unwrap();
868
869 self.aliases.insert(short_lower, exact_name);
870 true
871 }
872
873 pub(crate) fn rebuild_catalog(&mut self) {
888 let mut short_by_full: HashMap<String, String> = HashMap::new();
895 let record = |map: &mut HashMap<String, String>, short: &str, full: &str| {
896 let key = full.to_lowercase();
897 let keep = match map.get(&key) {
898 None => true,
899 Some(existing) => short.len() < existing.len(),
900 };
901 if keep {
902 map.insert(key, short.to_string());
903 }
904 };
905 for (short, full) in Self::built_in_aliases() {
906 record(&mut short_by_full, short, full);
907 }
908 for (short, full) in &self.aliases {
909 record(&mut short_by_full, short, full);
910 }
911
912 let derive_language_id =
913 |display_name: &str| -> (String, Option<fresh_languages::Language>) {
914 let ts = tree_sitter_for_syntect_name(display_name);
915 let id = ts
916 .map(|l| l.id().to_string())
917 .unwrap_or_else(|| display_name.to_lowercase());
918 (id, ts)
919 };
920
921 let mut catalog: Vec<GrammarEntry> = Vec::new();
922 let mut scope_to_index: HashMap<String, usize> = HashMap::new();
923
924 for (idx, syntax) in self.syntax_set.syntaxes().iter().enumerate() {
934 if syntax.name == "Plain Text" {
935 continue;
936 }
937 let (language_id, tree_sitter) = derive_language_id(&syntax.name);
938 let short_name = short_by_full.get(&syntax.name.to_lowercase()).cloned();
939 let source = self
940 .grammar_sources
941 .get(&syntax.name)
942 .map(|info| info.source.clone())
943 .unwrap_or(GrammarSource::BuiltIn);
944 let entry_index = catalog.len();
945 scope_to_index.insert(syntax.scope.to_string(), entry_index);
946
947 let mut extensions = syntax.file_extensions.clone();
953 if let Some(lang) = tree_sitter {
954 for ext in lang.extensions() {
955 let ext = ext.to_string();
956 if !extensions.iter().any(|e| e == &ext) {
957 extensions.push(ext);
958 }
959 }
960 }
961
962 catalog.push(GrammarEntry {
963 display_name: syntax.name.clone(),
964 language_id,
965 short_name,
966 extensions,
967 filenames: Vec::new(),
968 filename_globs: Vec::new(),
969 source,
970 engines: GrammarEngines {
971 syntect: Some(idx),
972 tree_sitter,
973 },
974 });
975 }
976
977 for (filename, scope) in &self.filename_scopes {
979 if let Some(&idx) = scope_to_index.get(scope) {
980 if !catalog[idx].filenames.iter().any(|f| f == filename) {
981 catalog[idx].filenames.push(filename.clone());
982 }
983 }
984 }
985
986 for (ext, scope) in &self.user_extensions {
988 if let Some(&idx) = scope_to_index.get(scope) {
989 if !catalog[idx].extensions.iter().any(|e| e == ext) {
990 catalog[idx].extensions.push(ext.clone());
991 }
992 }
993 }
994
995 let mut ts_covered: std::collections::HashSet<fresh_languages::Language> =
1000 std::collections::HashSet::new();
1001 for entry in &catalog {
1002 if let Some(lang) = entry.engines.tree_sitter {
1003 ts_covered.insert(lang);
1004 }
1005 }
1006 for lang in fresh_languages::Language::all() {
1007 if ts_covered.contains(lang) {
1008 continue;
1009 }
1010 let display_name = lang.display_name().to_string();
1011 let language_id = lang.id().to_string();
1012 let short_name = short_by_full.get(&display_name.to_lowercase()).cloned();
1013 let extensions: Vec<String> = lang.extensions().iter().map(|s| s.to_string()).collect();
1014 catalog.push(GrammarEntry {
1015 display_name,
1016 language_id,
1017 short_name,
1018 extensions,
1019 filenames: Vec::new(),
1020 filename_globs: Vec::new(),
1021 source: GrammarSource::BuiltIn,
1022 engines: GrammarEngines {
1023 syntect: None,
1024 tree_sitter: Some(*lang),
1025 },
1026 });
1027 }
1028
1029 let mut by_name: HashMap<String, usize> = HashMap::new();
1037 let mut by_extension: HashMap<String, usize> = HashMap::new();
1038 let mut by_filename: HashMap<String, usize> = HashMap::new();
1039 for (idx, entry) in catalog.iter().enumerate() {
1040 by_name.insert(entry.display_name.to_lowercase(), idx);
1041 by_name.insert(entry.language_id.to_lowercase(), idx);
1042 if let Some(short) = &entry.short_name {
1043 by_name.insert(short.to_lowercase(), idx);
1044 }
1045 for ext in &entry.extensions {
1046 by_extension.entry(ext.to_lowercase()).or_insert(idx);
1047 by_filename.entry(ext.clone()).or_insert(idx);
1048 }
1049 for filename in &entry.filenames {
1050 by_filename.entry(filename.clone()).or_insert(idx);
1051 }
1052 }
1053
1054 self.catalog = catalog;
1055 self.catalog_by_name = by_name;
1056 self.catalog_by_extension = by_extension;
1057 self.catalog_by_filename = by_filename;
1058
1059 if !self.applied_language_config.is_empty() {
1063 let cfg = std::mem::take(&mut self.applied_language_config);
1064 self.apply_language_config_inner(&cfg);
1065 self.applied_language_config = cfg;
1066 }
1067 }
1068
1069 pub fn catalog(&self) -> &[GrammarEntry] {
1071 &self.catalog
1072 }
1073
1074 pub fn find_by_name(&self, name: &str) -> Option<&GrammarEntry> {
1080 self.catalog_by_name
1081 .get(&name.to_lowercase())
1082 .map(|&idx| &self.catalog[idx])
1083 }
1084
1085 pub fn find_by_path(&self, path: &Path, first_line: Option<&str>) -> Option<&GrammarEntry> {
1106 let filename = path.file_name().and_then(|n| n.to_str());
1107 let path_str = path.to_str().unwrap_or("");
1108
1109 if let Some(name) = filename {
1110 if let Some(&idx) = self.catalog_by_filename.get(name) {
1111 return Some(&self.catalog[idx]);
1112 }
1113 }
1114
1115 if let Some(name) = filename {
1117 for entry in &self.catalog {
1118 for pattern in &entry.filename_globs {
1119 let matched = if is_path_pattern(pattern) {
1120 path_glob_matches(pattern, path_str)
1121 } else {
1122 filename_glob_matches(pattern, name)
1123 };
1124 if matched {
1125 return Some(entry);
1126 }
1127 }
1128 }
1129 }
1130
1131 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1132 if let Some(entry) = self.find_by_extension(ext) {
1133 return Some(entry);
1134 }
1135 }
1136
1137 let line = first_line?;
1142 let syntax = self.syntax_set.find_syntax_by_first_line(line)?;
1143 self.find_by_name(&syntax.name)
1144 }
1145
1146 pub fn find_by_extension(&self, ext: &str) -> Option<&GrammarEntry> {
1148 self.catalog_by_extension
1149 .get(&ext.to_lowercase())
1150 .map(|&idx| &self.catalog[idx])
1151 }
1152
1153 pub fn apply_language_config(
1166 &mut self,
1167 languages: &HashMap<String, crate::config::LanguageConfig>,
1168 ) {
1169 self.applied_language_config = languages.clone();
1170 self.apply_language_config_inner(languages);
1171 }
1172
1173 fn apply_language_config_inner(
1178 &mut self,
1179 languages: &HashMap<String, crate::config::LanguageConfig>,
1180 ) {
1181 for (lang_id, lang_cfg) in languages {
1182 let grammar_name = if lang_cfg.grammar.is_empty() {
1183 lang_id.as_str()
1184 } else {
1185 lang_cfg.grammar.as_str()
1186 };
1187
1188 let idx = self
1190 .catalog_by_name
1191 .get(&grammar_name.to_lowercase())
1192 .copied()
1193 .or_else(|| self.catalog_by_name.get(&lang_id.to_lowercase()).copied())
1194 .unwrap_or_else(|| {
1195 let idx = self.catalog.len();
1196 self.catalog.push(GrammarEntry {
1197 display_name: lang_id.clone(),
1198 language_id: lang_id.clone(),
1199 short_name: None,
1200 extensions: Vec::new(),
1201 filenames: Vec::new(),
1202 filename_globs: Vec::new(),
1203 source: GrammarSource::BuiltIn,
1204 engines: GrammarEngines::default(),
1205 });
1206 idx
1207 });
1208
1209 self.catalog_by_name
1214 .entry(lang_id.to_lowercase())
1215 .or_insert(idx);
1216
1217 for ext in &lang_cfg.extensions {
1218 if !self.catalog[idx].extensions.iter().any(|e| e == ext) {
1219 self.catalog[idx].extensions.push(ext.clone());
1220 }
1221 self.catalog_by_extension.insert(ext.to_lowercase(), idx);
1223 }
1224 for filename in &lang_cfg.filenames {
1225 if is_glob_pattern(filename) {
1226 if !self.catalog[idx]
1227 .filename_globs
1228 .iter()
1229 .any(|f| f == filename)
1230 {
1231 self.catalog[idx].filename_globs.push(filename.clone());
1232 }
1233 } else {
1234 if !self.catalog[idx].filenames.iter().any(|f| f == filename) {
1235 self.catalog[idx].filenames.push(filename.clone());
1236 }
1237 self.catalog_by_filename.insert(filename.clone(), idx);
1238 }
1239 }
1240 }
1241 }
1242
1243 pub fn syntax_set(&self) -> &Arc<SyntaxSet> {
1245 &self.syntax_set
1246 }
1247
1248 pub fn syntax_set_arc(&self) -> Arc<SyntaxSet> {
1250 Arc::clone(&self.syntax_set)
1251 }
1252
1253 pub fn available_syntaxes(&self) -> Vec<&str> {
1255 self.syntax_set
1256 .syntaxes()
1257 .iter()
1258 .map(|s| s.name.as_str())
1259 .collect()
1260 }
1261
1262 pub fn available_grammar_info(&self) -> Vec<GrammarInfo> {
1269 let mut result: Vec<GrammarInfo> = self
1270 .catalog
1271 .iter()
1272 .map(|entry| GrammarInfo {
1273 name: entry.display_name.clone(),
1274 source: entry.source.clone(),
1275 file_extensions: entry.extensions.clone(),
1276 short_name: entry.short_name.clone(),
1277 })
1278 .collect();
1279 result.sort_by(|a, b| a.name.to_lowercase().cmp(&b.name.to_lowercase()));
1280 result
1281 }
1282
1283 pub(crate) fn grammar_sources(&self) -> &HashMap<String, GrammarInfo> {
1285 &self.grammar_sources
1286 }
1287
1288 pub(crate) fn build_grammar_sources_from_syntax_set(
1292 syntax_set: &SyntaxSet,
1293 ) -> HashMap<String, GrammarInfo> {
1294 let mut sources = HashMap::new();
1295 for syntax in syntax_set.syntaxes() {
1296 sources.insert(
1297 syntax.name.clone(),
1298 GrammarInfo {
1299 name: syntax.name.clone(),
1300 source: GrammarSource::BuiltIn,
1301 file_extensions: syntax.file_extensions.clone(),
1302 short_name: None,
1303 },
1304 );
1305 }
1306 sources
1307 }
1308
1309 #[cfg(test)]
1311 pub(crate) fn user_extensions(&self) -> &HashMap<String, String> {
1312 &self.user_extensions
1313 }
1314
1315 #[cfg(test)]
1317 pub(crate) fn loaded_grammar_paths(&self) -> &[GrammarSpec] {
1318 &self.loaded_grammar_paths
1319 }
1320
1321 pub fn with_additional_grammars(
1335 base: &GrammarRegistry,
1336 additional: &[GrammarSpec],
1337 ) -> Option<Self> {
1338 tracing::info!(
1339 "[SYNTAX DEBUG] with_additional_grammars: adding {} grammars to base with {} syntaxes",
1340 additional.len(),
1341 base.syntax_set.syntaxes().len()
1342 );
1343
1344 let mut builder = (*base.syntax_set).clone().into_builder();
1348
1349 let mut user_extensions = base.user_extensions.clone();
1351
1352 let mut loaded_grammar_paths = base.loaded_grammar_paths.clone();
1354
1355 let mut grammar_sources = base.grammar_sources.clone();
1357
1358 for spec in additional {
1360 tracing::info!(
1361 "[SYNTAX DEBUG] loading new grammar file: lang='{}', path={:?}, extensions={:?}",
1362 spec.language,
1363 spec.path,
1364 spec.extensions
1365 );
1366 match Self::load_grammar_file(&spec.path) {
1367 Ok(syntax) => {
1368 let scope = syntax.scope.to_string();
1369 let syntax_name = syntax.name.clone();
1370 tracing::info!(
1371 "[SYNTAX DEBUG] grammar loaded successfully: name='{}', scope='{}'",
1372 syntax_name,
1373 scope
1374 );
1375 builder.add(syntax);
1376 tracing::info!(
1377 "Loaded grammar for '{}' from {:?} with extensions {:?}",
1378 spec.language,
1379 spec.path,
1380 spec.extensions
1381 );
1382 for ext in &spec.extensions {
1384 user_extensions.insert(ext.clone(), scope.clone());
1385 }
1386 grammar_sources.insert(
1388 syntax_name.clone(),
1389 GrammarInfo {
1390 name: syntax_name,
1391 source: GrammarSource::Plugin {
1392 plugin: spec.language.clone(),
1393 path: spec.path.clone(),
1394 },
1395 file_extensions: spec.extensions.clone(),
1396 short_name: None,
1397 },
1398 );
1399 loaded_grammar_paths.push(spec.clone());
1401 }
1402 Err(e) => {
1403 tracing::warn!(
1404 "Failed to load grammar for '{}' from {:?}: {}",
1405 spec.language,
1406 spec.path,
1407 e
1408 );
1409 }
1410 }
1411 }
1412
1413 let mut reg = Self {
1414 syntax_set: Arc::new(builder.build()),
1415 user_extensions,
1416 filename_scopes: base.filename_scopes.clone(),
1417 loaded_grammar_paths,
1418 grammar_sources,
1419 aliases: base.aliases.clone(),
1420 catalog: Vec::new(),
1421 catalog_by_name: HashMap::new(),
1422 catalog_by_extension: HashMap::new(),
1423 catalog_by_filename: HashMap::new(),
1424 applied_language_config: HashMap::new(),
1425 };
1426 reg.rebuild_catalog();
1427 Some(reg)
1428 }
1429
1430 pub(crate) fn load_grammar_file(path: &Path) -> Result<SyntaxDefinition, String> {
1436 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
1437
1438 match ext {
1439 "sublime-syntax" => {
1440 let content = std::fs::read_to_string(path)
1441 .map_err(|e| format!("Failed to read file: {}", e))?;
1442 SyntaxDefinition::load_from_str(
1443 &content,
1444 true,
1445 path.file_stem().and_then(|s| s.to_str()),
1446 )
1447 .map_err(|e| format!("Failed to parse sublime-syntax: {}", e))
1448 }
1449 _ => Err(format!(
1450 "Unsupported grammar format: .{}. Only .sublime-syntax is supported.",
1451 ext
1452 )),
1453 }
1454 }
1455}
1456
1457impl Default for GrammarRegistry {
1458 fn default() -> Self {
1459 let defaults = SyntaxSet::load_defaults_newlines();
1461 let mut builder = defaults.into_builder();
1462 Self::add_embedded_grammars(&mut builder);
1463 let syntax_set = builder.build();
1464 let filename_scopes = Self::build_filename_scopes();
1465 let extra_extensions = Self::build_extra_extensions();
1466
1467 let mut registry = Self::new(syntax_set, extra_extensions, filename_scopes);
1468 registry.populate_built_in_aliases();
1469 registry.rebuild_catalog();
1470 registry
1471 }
1472}
1473
1474#[derive(Debug, Deserialize)]
1477pub struct PackageManifest {
1478 #[serde(default)]
1479 pub contributes: Option<Contributes>,
1480}
1481
1482#[derive(Debug, Deserialize, Default)]
1483pub struct Contributes {
1484 #[serde(default)]
1485 pub languages: Vec<LanguageContribution>,
1486 #[serde(default)]
1487 pub grammars: Vec<GrammarContribution>,
1488}
1489
1490#[derive(Debug, Deserialize)]
1491pub struct LanguageContribution {
1492 pub id: String,
1493 #[serde(default)]
1494 pub extensions: Vec<String>,
1495}
1496
1497#[derive(Debug, Deserialize)]
1498pub struct GrammarContribution {
1499 pub language: String,
1500 #[serde(rename = "scopeName")]
1501 pub scope_name: String,
1502 pub path: String,
1503}
1504
1505#[cfg(test)]
1506mod tests {
1507 use super::*;
1508
1509 #[test]
1510 fn test_empty_registry() {
1511 let registry = GrammarRegistry::empty();
1512 assert!(!registry.available_syntaxes().is_empty());
1514 }
1515
1516 #[test]
1517 fn test_default_registry() {
1518 let registry = GrammarRegistry::default();
1519 assert!(!registry.available_syntaxes().is_empty());
1521 }
1522
1523 #[test]
1524 fn test_find_syntax_for_common_extensions() {
1525 let registry = GrammarRegistry::default();
1526
1527 let test_cases = [
1529 ("test.py", true),
1530 ("test.rs", true),
1531 ("test.js", true),
1532 ("test.json", true),
1533 ("test.md", true),
1534 ("test.html", true),
1535 ("test.css", true),
1536 ("test.unknown_extension_xyz", false),
1537 ];
1538
1539 for (filename, should_exist) in test_cases {
1540 let path = Path::new(filename);
1541 let result = registry.find_syntax_for_file(path);
1542 assert_eq!(
1543 result.is_some(),
1544 should_exist,
1545 "Expected {:?} for {}",
1546 should_exist,
1547 filename
1548 );
1549 }
1550 }
1551
1552 #[test]
1553 fn test_racket_grammar_loaded() {
1554 let registry = GrammarRegistry::default();
1555 for filename in ["main.rkt", "data.rktd", "info.rktl", "doc.scrbl"] {
1556 let result = registry.find_syntax_for_file(Path::new(filename));
1557 assert!(
1558 result.is_some(),
1559 "Racket grammar should be available for {}",
1560 filename
1561 );
1562 let entry = registry.find_by_path(Path::new(filename), None).unwrap();
1563 assert_eq!(entry.display_name, "Racket", "for {}", filename);
1564 }
1565 }
1566
1567 #[test]
1568 fn test_syntax_set_arc() {
1569 let registry = GrammarRegistry::default();
1570 let arc1 = registry.syntax_set_arc();
1571 let arc2 = registry.syntax_set_arc();
1572 assert!(Arc::ptr_eq(&arc1, &arc2));
1574 }
1575
1576 #[test]
1577 fn test_shell_dotfiles_detection() {
1578 let registry = GrammarRegistry::default();
1579
1580 let shell_files = [".zshrc", ".zprofile", ".zshenv", ".bash_aliases"];
1582
1583 for filename in shell_files {
1584 let path = Path::new(filename);
1585 let result = registry.find_syntax_for_file(path);
1586 assert!(
1587 result.is_some(),
1588 "{} should be detected as a syntax",
1589 filename
1590 );
1591 let syntax = result.unwrap();
1592 assert!(
1594 syntax.name.to_lowercase().contains("bash")
1595 || syntax.name.to_lowercase().contains("shell"),
1596 "{} should be detected as shell/bash, got: {}",
1597 filename,
1598 syntax.name
1599 );
1600 }
1601 }
1602
1603 #[test]
1604 fn test_pkgbuild_detection() {
1605 let registry = GrammarRegistry::default();
1606
1607 for filename in ["PKGBUILD", "APKBUILD"] {
1609 let path = Path::new(filename);
1610 let result = registry.find_syntax_for_file(path);
1611 assert!(
1612 result.is_some(),
1613 "{} should be detected as a syntax",
1614 filename
1615 );
1616 let syntax = result.unwrap();
1617 assert!(
1619 syntax.name.to_lowercase().contains("bash")
1620 || syntax.name.to_lowercase().contains("shell"),
1621 "{} should be detected as shell/bash, got: {}",
1622 filename,
1623 syntax.name
1624 );
1625 }
1626 }
1627
1628 #[test]
1629 fn test_find_syntax_with_glob_filenames() {
1630 let mut registry = GrammarRegistry::default();
1631 let mut languages = std::collections::HashMap::new();
1632 languages.insert(
1633 "shell-configs".to_string(),
1634 crate::config::LanguageConfig {
1635 extensions: vec!["sh".to_string()],
1636 filenames: vec!["*.conf".to_string(), "*rc".to_string()],
1637 grammar: "bash".to_string(),
1638 comment_prefix: Some("#".to_string()),
1639 auto_indent: true,
1640 auto_close: None,
1641 auto_surround: None,
1642 textmate_grammar: None,
1643 show_whitespace_tabs: true,
1644 line_wrap: None,
1645 wrap_column: None,
1646 page_view: None,
1647 page_width: None,
1648 use_tabs: None,
1649 tab_size: None,
1650 formatter: None,
1651 format_on_save: false,
1652 on_save: vec![],
1653 word_characters: None,
1654 },
1655 );
1656 registry.apply_language_config(&languages);
1657
1658 assert!(
1659 registry
1660 .find_by_path(Path::new("nftables.conf"), None)
1661 .is_some(),
1662 "*.conf should match nftables.conf"
1663 );
1664 assert!(
1665 registry.find_by_path(Path::new("lfrc"), None).is_some(),
1666 "*rc should match lfrc"
1667 );
1668 let _ = registry.find_by_path(Path::new("randomfile"), None);
1670 }
1671
1672 #[test]
1673 fn test_find_syntax_with_path_glob_filenames() {
1674 let mut registry = GrammarRegistry::default();
1675 let mut languages = std::collections::HashMap::new();
1676 languages.insert(
1677 "shell-configs".to_string(),
1678 crate::config::LanguageConfig {
1679 extensions: vec!["sh".to_string()],
1680 filenames: vec!["/etc/**/rc.*".to_string()],
1681 grammar: "bash".to_string(),
1682 comment_prefix: Some("#".to_string()),
1683 auto_indent: true,
1684 auto_close: None,
1685 auto_surround: None,
1686 textmate_grammar: None,
1687 show_whitespace_tabs: true,
1688 line_wrap: None,
1689 wrap_column: None,
1690 page_view: None,
1691 page_width: None,
1692 use_tabs: None,
1693 tab_size: None,
1694 formatter: None,
1695 format_on_save: false,
1696 on_save: vec![],
1697 word_characters: None,
1698 },
1699 );
1700 registry.apply_language_config(&languages);
1701
1702 assert!(
1703 registry
1704 .find_by_path(Path::new("/etc/rc.conf"), None)
1705 .is_some(),
1706 "/etc/**/rc.* should match /etc/rc.conf"
1707 );
1708 assert!(
1709 registry
1710 .find_by_path(Path::new("/etc/init/rc.local"), None)
1711 .is_some(),
1712 "/etc/**/rc.* should match /etc/init/rc.local"
1713 );
1714 let _ = registry.find_by_path(Path::new("/var/rc.conf"), None);
1715 }
1716
1717 #[test]
1718 fn test_exact_filename_takes_priority_over_glob() {
1719 let mut registry = GrammarRegistry::default();
1720 let mut languages = std::collections::HashMap::new();
1721
1722 languages.insert(
1724 "custom-lfrc".to_string(),
1725 crate::config::LanguageConfig {
1726 extensions: vec![],
1727 filenames: vec!["lfrc".to_string()],
1728 grammar: "python".to_string(),
1729 comment_prefix: Some("#".to_string()),
1730 auto_indent: true,
1731 auto_close: None,
1732 auto_surround: None,
1733 textmate_grammar: None,
1734 show_whitespace_tabs: true,
1735 line_wrap: None,
1736 wrap_column: None,
1737 page_view: None,
1738 page_width: None,
1739 use_tabs: None,
1740 tab_size: None,
1741 formatter: None,
1742 format_on_save: false,
1743 on_save: vec![],
1744 word_characters: None,
1745 },
1746 );
1747
1748 languages.insert(
1750 "rc-files".to_string(),
1751 crate::config::LanguageConfig {
1752 extensions: vec![],
1753 filenames: vec!["*rc".to_string()],
1754 grammar: "bash".to_string(),
1755 comment_prefix: Some("#".to_string()),
1756 auto_indent: true,
1757 auto_close: None,
1758 auto_surround: None,
1759 textmate_grammar: None,
1760 show_whitespace_tabs: true,
1761 line_wrap: None,
1762 wrap_column: None,
1763 page_view: None,
1764 page_width: None,
1765 use_tabs: None,
1766 tab_size: None,
1767 formatter: None,
1768 format_on_save: false,
1769 on_save: vec![],
1770 word_characters: None,
1771 },
1772 );
1773
1774 registry.apply_language_config(&languages);
1775
1776 let entry = registry.find_by_path(Path::new("lfrc"), None).unwrap();
1778 assert!(
1779 entry.display_name.to_lowercase().contains("python"),
1780 "exact match should win over glob, got: {}",
1781 entry.display_name
1782 );
1783 }
1784
1785 #[test]
1786 fn test_built_in_aliases_resolve() {
1787 let registry = GrammarRegistry::default();
1788
1789 let syntax = registry.find_syntax_by_name("bash");
1791 assert!(syntax.is_some(), "alias 'bash' should resolve");
1792 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1793
1794 let syntax = registry.find_syntax_by_name("cpp");
1796 assert!(syntax.is_some(), "alias 'cpp' should resolve");
1797 assert_eq!(syntax.unwrap().name, "C++");
1798
1799 let syntax = registry.find_syntax_by_name("csharp");
1801 assert!(syntax.is_some(), "alias 'csharp' should resolve");
1802 assert_eq!(syntax.unwrap().name, "C#");
1803
1804 let syntax = registry.find_syntax_by_name("sh");
1806 assert!(syntax.is_some(), "alias 'sh' should resolve");
1807 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1808
1809 let syntax = registry.find_syntax_by_name("proto");
1811 assert!(syntax.is_some(), "alias 'proto' should resolve");
1812 assert_eq!(syntax.unwrap().name, "Protocol Buffers");
1813 }
1814
1815 #[test]
1816 fn test_alias_case_insensitive_input() {
1817 let registry = GrammarRegistry::default();
1818
1819 let syntax = registry.find_syntax_by_name("BASH");
1821 assert!(
1822 syntax.is_some(),
1823 "alias 'BASH' should resolve case-insensitively"
1824 );
1825 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1826
1827 let syntax = registry.find_syntax_by_name("Cpp");
1828 assert!(
1829 syntax.is_some(),
1830 "alias 'Cpp' should resolve case-insensitively"
1831 );
1832 assert_eq!(syntax.unwrap().name, "C++");
1833 }
1834
1835 #[test]
1836 fn test_full_name_still_works() {
1837 let registry = GrammarRegistry::default();
1838
1839 let syntax = registry.find_syntax_by_name("Bourne Again Shell (bash)");
1841 assert!(syntax.is_some(), "full name should still resolve");
1842 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1843
1844 let syntax = registry.find_syntax_by_name("bourne again shell (bash)");
1846 assert!(
1847 syntax.is_some(),
1848 "case-insensitive full name should resolve"
1849 );
1850 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1851 }
1852
1853 #[test]
1854 fn test_alias_does_not_shadow_full_names() {
1855 let registry = GrammarRegistry::default();
1856
1857 let syntax = registry.find_syntax_by_name("rust");
1859 assert!(syntax.is_some());
1860 assert_eq!(syntax.unwrap().name, "Rust");
1861
1862 let syntax = registry.find_syntax_by_name("go");
1864 assert!(syntax.is_some());
1865 assert_eq!(syntax.unwrap().name, "Go");
1866 }
1867
1868 #[test]
1869 fn test_register_alias_rejects_collision() {
1870 let mut registry = GrammarRegistry::default();
1871
1872 assert!(registry.register_alias("myalias", "Rust"));
1874 assert!(!registry.register_alias("myalias", "Go"));
1875
1876 assert!(registry.register_alias("myalias", "Rust"));
1878 }
1879
1880 #[test]
1881 fn test_register_alias_rejects_nonexistent_target() {
1882 let mut registry = GrammarRegistry::default();
1883 assert!(!registry.register_alias("nope", "Nonexistent Grammar"));
1884 }
1885
1886 #[test]
1887 fn test_register_alias_skips_existing_grammar_name() {
1888 let mut registry = GrammarRegistry::default();
1889
1890 assert!(!registry.register_alias("rust", "Rust"));
1892 assert!(registry.find_syntax_by_name("rust").is_some());
1894 }
1895
1896 #[test]
1897 fn test_available_grammar_info_includes_short_names() {
1898 let registry = GrammarRegistry::default();
1899 let infos = registry.available_grammar_info();
1900
1901 let bash_info = infos.iter().find(|g| g.name == "Bourne Again Shell (bash)");
1902 assert!(bash_info.is_some(), "bash grammar should be in the list");
1903 let bash_info = bash_info.unwrap();
1904 assert!(
1905 bash_info.short_name.is_some(),
1906 "bash grammar should have a short_name"
1907 );
1908 assert_eq!(bash_info.short_name.as_deref(), Some("sh"));
1910 }
1911
1912 #[test]
1913 fn test_catalog_contains_each_language_once() {
1914 let registry = GrammarRegistry::default();
1915 let catalog = registry.catalog();
1916
1917 let mut seen = std::collections::HashSet::new();
1919 for entry in catalog {
1920 let key = entry.display_name.to_lowercase();
1921 assert!(
1922 seen.insert(key.clone()),
1923 "duplicate catalog entry for display_name={:?}",
1924 entry.display_name
1925 );
1926 }
1927
1928 let ts = registry
1931 .find_by_name("TypeScript")
1932 .expect("TypeScript must be in the catalog");
1933 assert!(ts.engines.syntect.is_none());
1934 assert_eq!(
1935 ts.engines.tree_sitter,
1936 Some(fresh_languages::Language::TypeScript)
1937 );
1938 assert_eq!(ts.language_id, "typescript");
1939 assert!(ts.extensions.iter().any(|e| e == "ts"));
1940
1941 for name in ["Rust", "Python", "JavaScript"] {
1944 let entry = registry
1945 .find_by_name(name)
1946 .unwrap_or_else(|| panic!("{} must be in the catalog", name));
1947 assert!(
1948 entry.engines.syntect.is_some(),
1949 "{} should have a syntect index",
1950 name
1951 );
1952 assert!(
1953 entry.engines.tree_sitter.is_some(),
1954 "{} should also have a tree-sitter language",
1955 name
1956 );
1957 let by_id = registry
1960 .find_by_name(&entry.language_id)
1961 .expect("language_id should resolve");
1962 assert_eq!(by_id.display_name, entry.display_name);
1963 }
1964 }
1965
1966 #[test]
1967 fn test_catalog_find_by_path_and_extension() {
1968 let registry = GrammarRegistry::default();
1969 let ts = registry
1970 .find_by_path(Path::new("foo.ts"), None)
1971 .expect("foo.ts should resolve");
1972 assert_eq!(ts.display_name, "TypeScript");
1973 let rs = registry.find_by_extension("rs").expect("rs should resolve");
1974 assert_eq!(rs.display_name, "Rust");
1975 }
1976
1977 fn lang_cfg(
1979 grammar: &str,
1980 extensions: &[&str],
1981 filenames: &[&str],
1982 ) -> crate::config::LanguageConfig {
1983 crate::config::LanguageConfig {
1984 extensions: extensions.iter().map(|s| s.to_string()).collect(),
1985 filenames: filenames.iter().map(|s| s.to_string()).collect(),
1986 grammar: grammar.to_string(),
1987 comment_prefix: None,
1988 auto_indent: true,
1989 auto_close: None,
1990 auto_surround: None,
1991 textmate_grammar: None,
1992 show_whitespace_tabs: true,
1993 line_wrap: None,
1994 wrap_column: None,
1995 page_view: None,
1996 page_width: None,
1997 use_tabs: None,
1998 tab_size: None,
1999 formatter: None,
2000 format_on_save: false,
2001 on_save: vec![],
2002 word_characters: None,
2003 }
2004 }
2005
2006 #[test]
2010 fn test_user_alias_resolves_via_find_by_name() {
2011 let mut registry = GrammarRegistry::default();
2012 let mut languages = std::collections::HashMap::new();
2013 languages.insert("mylang".to_string(), lang_cfg("Rust", &[], &[]));
2014 registry.apply_language_config(&languages);
2015
2016 let entry = registry
2017 .find_by_name("mylang")
2018 .expect("user-declared alias 'mylang' must resolve");
2019 assert_eq!(entry.display_name, "Rust");
2020 }
2021
2022 #[test]
2026 fn test_register_alias_preserves_applied_language_config() {
2027 let mut registry = GrammarRegistry::default();
2028 let mut languages = std::collections::HashMap::new();
2029 languages.insert(
2030 "shell-configs".to_string(),
2031 lang_cfg("bash", &["myconf"], &["*.myconf"]),
2032 );
2033 registry.apply_language_config(&languages);
2034
2035 assert!(registry.find_by_extension("myconf").is_some());
2037 assert!(
2038 registry
2039 .find_by_path(Path::new("foo.myconf"), None)
2040 .is_some(),
2041 "glob should match before register_alias"
2042 );
2043
2044 registry.register_alias("mycustom", "Rust");
2046
2047 assert!(
2048 registry.find_by_extension("myconf").is_some(),
2049 "config extension must survive register_alias"
2050 );
2051 assert!(
2052 registry
2053 .find_by_path(Path::new("foo.myconf"), None)
2054 .is_some(),
2055 "glob must survive register_alias"
2056 );
2057 }
2058
2059 #[test]
2063 fn test_from_syntax_name_preserves_canonical_display_name() {
2064 use crate::primitives::detected_language::DetectedLanguage;
2065 let registry = GrammarRegistry::default();
2066 let languages = std::collections::HashMap::new();
2067
2068 let detected = DetectedLanguage::from_syntax_name("BASH", ®istry, &languages)
2069 .expect("BASH should resolve via alias");
2070 assert_eq!(
2071 detected.display_name, "Bourne Again Shell (bash)",
2072 "display_name must be canonical, not user-typed"
2073 );
2074 }
2075
2076 #[test]
2080 fn test_config_only_language_appears_in_catalog() {
2081 let mut registry = GrammarRegistry::default();
2082 let mut languages = std::collections::HashMap::new();
2083 languages.insert("fish".to_string(), lang_cfg("fish", &["fish"], &[]));
2085 registry.apply_language_config(&languages);
2086
2087 let entry = registry
2088 .find_by_name("fish")
2089 .expect("fish should be in the catalog after apply_language_config");
2090 assert!(entry.engines.syntect.is_none());
2091 assert!(entry.engines.tree_sitter.is_none());
2092 assert_eq!(entry.language_id, "fish");
2093 assert!(entry.extensions.iter().any(|e| e == "fish"));
2094 }
2095
2096 #[test]
2101 fn test_config_extension_overrides_builtin() {
2102 let mut registry = GrammarRegistry::default();
2103 assert_eq!(
2105 registry.find_by_extension("js").unwrap().display_name,
2106 "JavaScript"
2107 );
2108
2109 let mut languages = std::collections::HashMap::new();
2110 languages.insert(
2111 "ts-overlay".to_string(),
2112 lang_cfg("TypeScript", &["js"], &[]),
2113 );
2114 registry.apply_language_config(&languages);
2115
2116 assert_eq!(
2117 registry.find_by_extension("js").unwrap().display_name,
2118 "TypeScript",
2119 "user-config extension must win over built-in"
2120 );
2121 }
2122
2123 #[test]
2130 fn test_bare_filename_resolves_via_find_by_path() {
2131 let registry = GrammarRegistry::default();
2132 for (filename, expected_substr) in [
2133 ("Gemfile", "ruby"),
2134 ("Rakefile", "ruby"),
2135 ("Vagrantfile", "ruby"),
2136 ("Makefile", "makefile"),
2137 ("GNUmakefile", "makefile"),
2138 ] {
2139 let entry = registry
2140 .find_by_path(Path::new(filename), None)
2141 .unwrap_or_else(|| panic!("{} must resolve via catalog", filename));
2142 assert!(
2143 entry.display_name.to_lowercase().contains(expected_substr),
2144 "{} should resolve to {} grammar, got {}",
2145 filename,
2146 expected_substr,
2147 entry.display_name
2148 );
2149 }
2150 }
2151
2152 #[test]
2157 fn test_jsx_resolves_to_javascript() {
2158 let registry = GrammarRegistry::default();
2159 let entry = registry
2160 .find_by_path(Path::new("foo.jsx"), None)
2161 .expect("foo.jsx must resolve");
2162 assert_eq!(entry.display_name, "JavaScript");
2163 }
2164
2165 #[test]
2170 fn test_rebuild_catalog_replays_language_config() {
2171 let mut registry = GrammarRegistry::default();
2172 let mut languages = std::collections::HashMap::new();
2173 languages.insert(
2174 "myshell".to_string(),
2175 lang_cfg("bash", &["myext"], &["*.myglob"]),
2176 );
2177 registry.apply_language_config(&languages);
2178 assert!(registry.find_by_extension("myext").is_some());
2179 assert!(registry
2180 .find_by_path(Path::new("foo.myglob"), None)
2181 .is_some());
2182
2183 registry.rebuild_catalog();
2186 assert!(
2187 registry.find_by_extension("myext").is_some(),
2188 "rebuild_catalog must replay applied user config"
2189 );
2190 assert!(
2191 registry
2192 .find_by_path(Path::new("foo.myglob"), None)
2193 .is_some(),
2194 "rebuild_catalog must replay user globs"
2195 );
2196 }
2197
2198 #[test]
2201 fn test_apply_language_config_idempotent() {
2202 let mut registry = GrammarRegistry::default();
2203 let mut languages = std::collections::HashMap::new();
2204 languages.insert(
2205 "shell-cfg".to_string(),
2206 lang_cfg("bash", &["myconf"], &["*.myconf"]),
2207 );
2208
2209 registry.apply_language_config(&languages);
2210 let first_extensions = registry
2211 .find_by_name("bash")
2212 .unwrap()
2213 .extensions
2214 .iter()
2215 .filter(|e| e == &"myconf")
2216 .count();
2217 let first_globs = registry
2218 .find_by_name("bash")
2219 .unwrap()
2220 .filename_globs
2221 .iter()
2222 .filter(|g| g == &"*.myconf")
2223 .count();
2224 assert_eq!(first_extensions, 1);
2225 assert_eq!(first_globs, 1);
2226
2227 registry.apply_language_config(&languages);
2229 let second_extensions = registry
2230 .find_by_name("bash")
2231 .unwrap()
2232 .extensions
2233 .iter()
2234 .filter(|e| e == &"myconf")
2235 .count();
2236 let second_globs = registry
2237 .find_by_name("bash")
2238 .unwrap()
2239 .filename_globs
2240 .iter()
2241 .filter(|g| g == &"*.myconf")
2242 .count();
2243 assert_eq!(second_extensions, 1, "extensions must not duplicate");
2244 assert_eq!(second_globs, 1, "globs must not duplicate");
2245 }
2246
2247 #[test]
2251 fn test_tree_sitter_bridge() {
2252 assert_eq!(
2253 tree_sitter_for_syntect_name("Bourne Again Shell (bash)"),
2254 Some(fresh_languages::Language::Bash)
2255 );
2256 assert_eq!(
2257 tree_sitter_for_syntect_name("Rust"),
2258 Some(fresh_languages::Language::Rust)
2259 );
2260 assert_eq!(tree_sitter_for_syntect_name("Nushell"), None);
2262 assert_eq!(tree_sitter_for_syntect_name("does-not-exist"), None);
2264 }
2265}