1use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10use syntect::parsing::{SyntaxDefinition, SyntaxReference, SyntaxSet, SyntaxSetBuilder};
11
12pub use crate::primitives::glob_match::{
14 filename_glob_matches, is_glob_pattern, is_path_pattern, path_glob_matches,
15};
16
17#[derive(Clone, Debug)]
22pub struct GrammarSpec {
23 pub language: String,
25 pub path: PathBuf,
27 pub extensions: Vec<String>,
29}
30
31#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
33#[serde(tag = "type")]
34pub enum GrammarSource {
35 #[serde(rename = "built-in")]
37 BuiltIn,
38 #[serde(rename = "user")]
40 User { path: PathBuf },
41 #[serde(rename = "language-pack")]
43 LanguagePack { name: String, path: PathBuf },
44 #[serde(rename = "bundle")]
46 Bundle { name: String, path: PathBuf },
47 #[serde(rename = "plugin")]
49 Plugin { plugin: String, path: PathBuf },
50}
51
52impl std::fmt::Display for GrammarSource {
53 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54 match self {
55 GrammarSource::BuiltIn => write!(f, "built-in"),
56 GrammarSource::User { path } => write!(f, "user ({})", path.display()),
57 GrammarSource::LanguagePack { name, .. } => write!(f, "language-pack ({})", name),
58 GrammarSource::Bundle { name, .. } => write!(f, "bundle ({})", name),
59 GrammarSource::Plugin { plugin, .. } => write!(f, "plugin ({})", plugin),
60 }
61 }
62}
63
64#[derive(Clone, Debug, Serialize, Deserialize)]
66pub struct GrammarInfo {
67 pub name: String,
69 pub source: GrammarSource,
71 pub file_extensions: Vec<String>,
73 #[serde(default, skip_serializing_if = "Option::is_none")]
75 pub short_name: Option<String>,
76}
77
78const SYNTECT_TO_TREE_SITTER_ALIASES: &[(&str, fresh_languages::Language)] =
86 &[("Bourne Again Shell (bash)", fresh_languages::Language::Bash)];
87
88fn tree_sitter_for_syntect_name(display_name: &str) -> Option<fresh_languages::Language> {
91 for (syntect_name, lang) in SYNTECT_TO_TREE_SITTER_ALIASES {
92 if *syntect_name == display_name {
93 return Some(*lang);
94 }
95 }
96 fresh_languages::Language::all()
97 .iter()
98 .find(|l| l.display_name() == display_name)
99 .copied()
100}
101
102#[derive(Clone, Debug, Default)]
107pub struct GrammarEngines {
108 pub syntect: Option<usize>,
111 pub tree_sitter: Option<fresh_languages::Language>,
113}
114
115#[derive(Clone, Debug)]
124pub struct GrammarEntry {
125 pub display_name: String,
127 pub language_id: String,
129 pub short_name: Option<String>,
131 pub extensions: Vec<String>,
133 pub filenames: Vec<String>,
135 pub filename_globs: Vec<String>,
137 pub source: GrammarSource,
139 pub engines: GrammarEngines,
141}
142
143pub const TOML_GRAMMAR: &str = include_str!("../../grammars/toml.sublime-syntax");
145
146pub const ODIN_GRAMMAR: &str = include_str!("../../grammars/odin/Odin.sublime-syntax");
149
150pub const ZIG_GRAMMAR: &str = include_str!("../../grammars/zig.sublime-syntax");
152
153pub const GIT_REBASE_GRAMMAR: &str = include_str!("../../grammars/git-rebase.sublime-syntax");
155
156pub const GIT_COMMIT_GRAMMAR: &str = include_str!("../../grammars/git-commit.sublime-syntax");
158
159pub const GITIGNORE_GRAMMAR: &str = include_str!("../../grammars/gitignore.sublime-syntax");
161
162pub const GITCONFIG_GRAMMAR: &str = include_str!("../../grammars/gitconfig.sublime-syntax");
164
165pub const GITATTRIBUTES_GRAMMAR: &str = include_str!("../../grammars/gitattributes.sublime-syntax");
167
168pub const TYPST_GRAMMAR: &str = include_str!("../../grammars/typst.sublime-syntax");
170
171pub const DOCKERFILE_GRAMMAR: &str = include_str!("../../grammars/dockerfile.sublime-syntax");
173pub const INI_GRAMMAR: &str = include_str!("../../grammars/ini.sublime-syntax");
175pub const CMAKE_GRAMMAR: &str = include_str!("../../grammars/cmake.sublime-syntax");
177pub const SCSS_GRAMMAR: &str = include_str!("../../grammars/scss.sublime-syntax");
179pub const LESS_GRAMMAR: &str = include_str!("../../grammars/less.sublime-syntax");
181pub const POWERSHELL_GRAMMAR: &str = include_str!("../../grammars/powershell.sublime-syntax");
183pub const KOTLIN_GRAMMAR: &str = include_str!("../../grammars/kotlin.sublime-syntax");
185pub const SWIFT_GRAMMAR: &str = include_str!("../../grammars/swift.sublime-syntax");
187pub const DART_GRAMMAR: &str = include_str!("../../grammars/dart.sublime-syntax");
189pub const ELIXIR_GRAMMAR: &str = include_str!("../../grammars/elixir.sublime-syntax");
191pub const FSHARP_GRAMMAR: &str = include_str!("../../grammars/fsharp.sublime-syntax");
193pub const NIX_GRAMMAR: &str = include_str!("../../grammars/nix.sublime-syntax");
195pub const HCL_GRAMMAR: &str = include_str!("../../grammars/hcl.sublime-syntax");
197pub const PROTOBUF_GRAMMAR: &str = include_str!("../../grammars/protobuf.sublime-syntax");
199pub const GRAPHQL_GRAMMAR: &str = include_str!("../../grammars/graphql.sublime-syntax");
201pub const JULIA_GRAMMAR: &str = include_str!("../../grammars/julia.sublime-syntax");
203pub const NIM_GRAMMAR: &str = include_str!("../../grammars/nim.sublime-syntax");
205pub const GLEAM_GRAMMAR: &str = include_str!("../../grammars/gleam.sublime-syntax");
207pub const VLANG_GRAMMAR: &str = include_str!("../../grammars/vlang.sublime-syntax");
209pub const SOLIDITY_GRAMMAR: &str = include_str!("../../grammars/solidity.sublime-syntax");
211pub const KDL_GRAMMAR: &str = include_str!("../../grammars/kdl.sublime-syntax");
213pub const NUSHELL_GRAMMAR: &str = include_str!("../../grammars/nushell.sublime-syntax");
215pub const STARLARK_GRAMMAR: &str = include_str!("../../grammars/starlark.sublime-syntax");
217pub const JUSTFILE_GRAMMAR: &str = include_str!("../../grammars/justfile.sublime-syntax");
219pub const EARTHFILE_GRAMMAR: &str = include_str!("../../grammars/earthfile.sublime-syntax");
221pub const GOMOD_GRAMMAR: &str = include_str!("../../grammars/gomod.sublime-syntax");
223pub const VUE_GRAMMAR: &str = include_str!("../../grammars/vue.sublime-syntax");
225pub const SVELTE_GRAMMAR: &str = include_str!("../../grammars/svelte.sublime-syntax");
227pub const ASTRO_GRAMMAR: &str = include_str!("../../grammars/astro.sublime-syntax");
229pub const HYPRLANG_GRAMMAR: &str = include_str!("../../grammars/hyprlang.sublime-syntax");
231pub const AUTOHOTKEY_GRAMMAR: &str =
234 include_str!("../../grammars/autohotkey/AutoHotkey.sublime-syntax");
235pub const RACKET_GRAMMAR: &str = include_str!("../../grammars/racket.sublime-syntax");
237pub const VERILOG_GRAMMAR: &str = include_str!("../../grammars/verilog.sublime-syntax");
239pub const SYSTEMVERILOG_GRAMMAR: &str = include_str!("../../grammars/systemverilog.sublime-syntax");
241pub const VHDL_GRAMMAR: &str = include_str!("../../grammars/vhdl.sublime-syntax");
243
244impl std::fmt::Debug for GrammarRegistry {
249 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
250 f.debug_struct("GrammarRegistry")
251 .field("syntax_count", &self.syntax_set.syntaxes().len())
252 .finish()
253 }
254}
255
256pub struct GrammarRegistry {
257 syntax_set: Arc<SyntaxSet>,
259 user_extensions: HashMap<String, String>,
261 filename_scopes: HashMap<String, String>,
263 loaded_grammar_paths: Vec<GrammarSpec>,
265 grammar_sources: HashMap<String, GrammarInfo>,
267 aliases: HashMap<String, String>,
271 catalog: Vec<GrammarEntry>,
275 catalog_by_name: HashMap<String, usize>,
278 catalog_by_extension: HashMap<String, usize>,
280 catalog_by_filename: HashMap<String, usize>,
282 applied_language_config: HashMap<String, crate::config::LanguageConfig>,
287 catalog_gen: u64,
291}
292
293impl GrammarRegistry {
294 pub(crate) fn new(
299 syntax_set: SyntaxSet,
300 user_extensions: HashMap<String, String>,
301 filename_scopes: HashMap<String, String>,
302 ) -> Self {
303 Self::new_with_loaded_paths(
304 syntax_set,
305 user_extensions,
306 filename_scopes,
307 Vec::new(),
308 HashMap::new(),
309 )
310 }
311
312 pub(crate) fn new_with_loaded_paths(
317 syntax_set: SyntaxSet,
318 user_extensions: HashMap<String, String>,
319 filename_scopes: HashMap<String, String>,
320 loaded_grammar_paths: Vec<GrammarSpec>,
321 grammar_sources: HashMap<String, GrammarInfo>,
322 ) -> Self {
323 let mut reg = Self {
324 syntax_set: Arc::new(syntax_set),
325 user_extensions,
326 filename_scopes,
327 loaded_grammar_paths,
328 grammar_sources,
329 aliases: HashMap::new(),
330 catalog: Vec::new(),
331 catalog_by_name: HashMap::new(),
332 catalog_by_extension: HashMap::new(),
333 catalog_by_filename: HashMap::new(),
334 applied_language_config: HashMap::new(),
335 catalog_gen: 0,
336 };
337 reg.rebuild_catalog();
338 reg
339 }
340
341 pub fn empty() -> Arc<Self> {
343 let mut builder = SyntaxSetBuilder::new();
344 builder.add_plain_text_syntax();
345 let mut reg = Self {
346 syntax_set: Arc::new(builder.build()),
347 user_extensions: HashMap::new(),
348 filename_scopes: HashMap::new(),
349 loaded_grammar_paths: Vec::new(),
350 grammar_sources: HashMap::new(),
351 aliases: HashMap::new(),
352 catalog: Vec::new(),
353 catalog_by_name: HashMap::new(),
354 catalog_by_extension: HashMap::new(),
355 catalog_by_filename: HashMap::new(),
356 applied_language_config: HashMap::new(),
357 catalog_gen: 0,
358 };
359 reg.rebuild_catalog();
360 Arc::new(reg)
361 }
362
363 pub fn defaults_only() -> Arc<Self> {
370 tracing::info!("defaults_only: loading pre-compiled syntax packdump...");
374 let syntax_set: SyntaxSet = syntect::dumps::from_uncompressed_data(include_bytes!(
375 concat!(env!("OUT_DIR"), "/default_syntaxes.packdump")
376 ))
377 .expect("Failed to load pre-compiled syntax packdump");
378 tracing::info!(
379 "defaults_only: loaded ({} syntaxes)",
380 syntax_set.syntaxes().len()
381 );
382 let grammar_sources = Self::build_grammar_sources_from_syntax_set(&syntax_set);
383 let filename_scopes = Self::build_filename_scopes();
384 let extra_extensions = Self::build_extra_extensions();
385 let mut registry = Self {
386 syntax_set: Arc::new(syntax_set),
387 user_extensions: extra_extensions,
388 filename_scopes,
389 loaded_grammar_paths: Vec::new(),
390 grammar_sources,
391 aliases: HashMap::new(),
392 catalog: Vec::new(),
393 catalog_by_name: HashMap::new(),
394 catalog_by_extension: HashMap::new(),
395 catalog_by_filename: HashMap::new(),
396 applied_language_config: HashMap::new(),
397 catalog_gen: 0,
398 };
399 registry.populate_built_in_aliases();
400 registry.rebuild_catalog();
401 Arc::new(registry)
402 }
403
404 pub(crate) fn build_extra_extensions() -> HashMap<String, String> {
409 let mut map = HashMap::new();
410
411 let js_scope = "source.js".to_string();
413 map.insert("cjs".to_string(), js_scope.clone());
414 map.insert("mjs".to_string(), js_scope);
415
416 map
420 }
421
422 pub(crate) fn build_filename_scopes() -> HashMap<String, String> {
424 let mut map = HashMap::new();
425
426 let shell_scope = "source.shell.bash".to_string();
428 for filename in [
429 ".zshrc",
430 ".zprofile",
431 ".zshenv",
432 ".zlogin",
433 ".zlogout",
434 ".bash_aliases",
435 "PKGBUILD",
438 "APKBUILD",
439 ] {
440 map.insert(filename.to_string(), shell_scope.clone());
441 }
442
443 let git_rebase_scope = "source.git-rebase-todo".to_string();
445 map.insert("git-rebase-todo".to_string(), git_rebase_scope);
446
447 let git_commit_scope = "source.git-commit".to_string();
449 for filename in ["COMMIT_EDITMSG", "MERGE_MSG", "SQUASH_MSG", "TAG_EDITMSG"] {
450 map.insert(filename.to_string(), git_commit_scope.clone());
451 }
452
453 let gitignore_scope = "source.gitignore".to_string();
455 for filename in [".gitignore", ".dockerignore", ".npmignore", ".hgignore"] {
456 map.insert(filename.to_string(), gitignore_scope.clone());
457 }
458
459 let gitconfig_scope = "source.gitconfig".to_string();
461 for filename in [".gitconfig", ".gitmodules"] {
462 map.insert(filename.to_string(), gitconfig_scope.clone());
463 }
464
465 let gitattributes_scope = "source.gitattributes".to_string();
467 map.insert(".gitattributes".to_string(), gitattributes_scope);
468
469 let groovy_scope = "source.groovy".to_string();
471 map.insert("Jenkinsfile".to_string(), groovy_scope);
472
473 let ruby_scope = "source.ruby".to_string();
476 map.insert("Brewfile".to_string(), ruby_scope);
477
478 let dockerfile_scope = "source.dockerfile".to_string();
480 map.insert("Dockerfile".to_string(), dockerfile_scope.clone());
481 map.insert("Containerfile".to_string(), dockerfile_scope.clone());
482 map.insert("Dockerfile.dev".to_string(), dockerfile_scope.clone());
484 map.insert("Dockerfile.prod".to_string(), dockerfile_scope.clone());
485 map.insert("Dockerfile.test".to_string(), dockerfile_scope.clone());
486 map.insert("Dockerfile.build".to_string(), dockerfile_scope.clone());
487
488 let cmake_scope = "source.cmake".to_string();
490 map.insert("CMakeLists.txt".to_string(), cmake_scope);
491
492 let starlark_scope = "source.starlark".to_string();
494 map.insert("BUILD".to_string(), starlark_scope.clone());
495 map.insert("BUILD.bazel".to_string(), starlark_scope.clone());
496 map.insert("WORKSPACE".to_string(), starlark_scope.clone());
497 map.insert("WORKSPACE.bazel".to_string(), starlark_scope.clone());
498 map.insert("Tiltfile".to_string(), starlark_scope);
499
500 let justfile_scope = "source.justfile".to_string();
502 map.insert("justfile".to_string(), justfile_scope.clone());
503 map.insert("Justfile".to_string(), justfile_scope.clone());
504 map.insert(".justfile".to_string(), justfile_scope);
505
506 let ini_scope = "source.ini".to_string();
508 map.insert(".editorconfig".to_string(), ini_scope);
509
510 let earthfile_scope = "source.earthfile".to_string();
512 map.insert("Earthfile".to_string(), earthfile_scope);
513
514 let hyprlang_scope = "source.hyprlang".to_string();
516 map.insert("hyprland.conf".to_string(), hyprlang_scope.clone());
517 map.insert("hyprpaper.conf".to_string(), hyprlang_scope.clone());
518 map.insert("hyprlock.conf".to_string(), hyprlang_scope);
519
520 let gomod_scope = "source.gomod".to_string();
522 map.insert("go.mod".to_string(), gomod_scope.clone());
523 map.insert("go.sum".to_string(), gomod_scope);
524
525 map
526 }
527
528 pub(crate) fn add_embedded_grammars(builder: &mut SyntaxSetBuilder) {
530 match SyntaxDefinition::load_from_str(TOML_GRAMMAR, true, Some("TOML")) {
532 Ok(syntax) => {
533 builder.add(syntax);
534 tracing::debug!("Loaded embedded TOML grammar");
535 }
536 Err(e) => {
537 tracing::warn!("Failed to load embedded TOML grammar: {}", e);
538 }
539 }
540
541 match SyntaxDefinition::load_from_str(ODIN_GRAMMAR, true, Some("Odin")) {
543 Ok(syntax) => {
544 builder.add(syntax);
545 tracing::debug!("Loaded embedded Odin grammar");
546 }
547 Err(e) => {
548 tracing::warn!("Failed to load embedded Odin grammar: {}", e);
549 }
550 }
551
552 match SyntaxDefinition::load_from_str(ZIG_GRAMMAR, true, Some("Zig")) {
554 Ok(syntax) => {
555 builder.add(syntax);
556 tracing::debug!("Loaded embedded Zig grammar");
557 }
558 Err(e) => {
559 tracing::warn!("Failed to load embedded Zig grammar: {}", e);
560 }
561 }
562
563 match SyntaxDefinition::load_from_str(GIT_REBASE_GRAMMAR, true, Some("Git Rebase Todo")) {
565 Ok(syntax) => {
566 builder.add(syntax);
567 tracing::debug!("Loaded embedded Git Rebase Todo grammar");
568 }
569 Err(e) => {
570 tracing::warn!("Failed to load embedded Git Rebase Todo grammar: {}", e);
571 }
572 }
573
574 match SyntaxDefinition::load_from_str(GIT_COMMIT_GRAMMAR, true, Some("Git Commit Message"))
576 {
577 Ok(syntax) => {
578 builder.add(syntax);
579 tracing::debug!("Loaded embedded Git Commit Message grammar");
580 }
581 Err(e) => {
582 tracing::warn!("Failed to load embedded Git Commit Message grammar: {}", e);
583 }
584 }
585
586 match SyntaxDefinition::load_from_str(GITIGNORE_GRAMMAR, true, Some("Gitignore")) {
588 Ok(syntax) => {
589 builder.add(syntax);
590 tracing::debug!("Loaded embedded Gitignore grammar");
591 }
592 Err(e) => {
593 tracing::warn!("Failed to load embedded Gitignore grammar: {}", e);
594 }
595 }
596
597 match SyntaxDefinition::load_from_str(GITCONFIG_GRAMMAR, true, Some("Git Config")) {
599 Ok(syntax) => {
600 builder.add(syntax);
601 tracing::debug!("Loaded embedded Git Config grammar");
602 }
603 Err(e) => {
604 tracing::warn!("Failed to load embedded Git Config grammar: {}", e);
605 }
606 }
607
608 match SyntaxDefinition::load_from_str(GITATTRIBUTES_GRAMMAR, true, Some("Git Attributes")) {
610 Ok(syntax) => {
611 builder.add(syntax);
612 tracing::debug!("Loaded embedded Git Attributes grammar");
613 }
614 Err(e) => {
615 tracing::warn!("Failed to load embedded Git Attributes grammar: {}", e);
616 }
617 }
618
619 match SyntaxDefinition::load_from_str(TYPST_GRAMMAR, true, Some("Typst")) {
621 Ok(syntax) => {
622 builder.add(syntax);
623 tracing::debug!("Loaded embedded Typst grammar");
624 }
625 Err(e) => {
626 tracing::warn!("Failed to load embedded Typst grammar: {}", e);
627 }
628 }
629
630 let additional_grammars: &[(&str, &str)] = &[
632 (DOCKERFILE_GRAMMAR, "Dockerfile"),
633 (INI_GRAMMAR, "INI"),
634 (CMAKE_GRAMMAR, "CMake"),
635 (SCSS_GRAMMAR, "SCSS"),
636 (LESS_GRAMMAR, "LESS"),
637 (POWERSHELL_GRAMMAR, "PowerShell"),
638 (KOTLIN_GRAMMAR, "Kotlin"),
639 (SWIFT_GRAMMAR, "Swift"),
640 (DART_GRAMMAR, "Dart"),
641 (ELIXIR_GRAMMAR, "Elixir"),
642 (FSHARP_GRAMMAR, "FSharp"),
643 (NIX_GRAMMAR, "Nix"),
644 (HCL_GRAMMAR, "HCL"),
645 (PROTOBUF_GRAMMAR, "Protocol Buffers"),
646 (GRAPHQL_GRAMMAR, "GraphQL"),
647 (JULIA_GRAMMAR, "Julia"),
648 (NIM_GRAMMAR, "Nim"),
649 (GLEAM_GRAMMAR, "Gleam"),
650 (VLANG_GRAMMAR, "V"),
651 (SOLIDITY_GRAMMAR, "Solidity"),
652 (KDL_GRAMMAR, "KDL"),
653 (NUSHELL_GRAMMAR, "Nushell"),
654 (STARLARK_GRAMMAR, "Starlark"),
655 (JUSTFILE_GRAMMAR, "Justfile"),
656 (EARTHFILE_GRAMMAR, "Earthfile"),
657 (GOMOD_GRAMMAR, "Go Module"),
658 (VUE_GRAMMAR, "Vue"),
659 (SVELTE_GRAMMAR, "Svelte"),
660 (ASTRO_GRAMMAR, "Astro"),
661 (HYPRLANG_GRAMMAR, "Hyprlang"),
662 (AUTOHOTKEY_GRAMMAR, "AutoHotkey"),
663 (RACKET_GRAMMAR, "Racket"),
664 (VERILOG_GRAMMAR, "Verilog"),
665 (SYSTEMVERILOG_GRAMMAR, "SystemVerilog"),
666 (VHDL_GRAMMAR, "VHDL"),
667 ];
668
669 for (grammar_str, name) in additional_grammars {
670 match SyntaxDefinition::load_from_str(grammar_str, true, Some(name)) {
671 Ok(syntax) => {
672 builder.add(syntax);
673 tracing::debug!("Loaded embedded {} grammar", name);
674 }
675 Err(e) => {
676 tracing::warn!("Failed to load embedded {} grammar: {}", name, e);
677 }
678 }
679 }
680 }
681
682 pub fn find_syntax_for_file(&self, path: &Path) -> Option<&SyntaxReference> {
688 let entry = self.find_by_path(path, None)?;
689 entry
690 .engines
691 .syntect
692 .map(|i| &self.syntax_set.syntaxes()[i])
693 }
694
695 pub fn find_syntax_by_name(&self, name: &str) -> Option<&SyntaxReference> {
703 if let Some(entry) = self.find_by_name(name) {
704 if let Some(idx) = entry.engines.syntect {
705 return Some(&self.syntax_set.syntaxes()[idx]);
706 }
707 }
708 self.syntax_set.find_syntax_by_name(name)
712 }
713
714 fn built_in_aliases() -> Vec<(&'static str, &'static str)> {
723 vec![
724 ("bash", "Bourne Again Shell (bash)"),
726 ("shell", "Bourne Again Shell (bash)"),
727 ("sh", "Bourne Again Shell (bash)"),
728 ("c++", "C++"),
729 ("cpp", "C++"),
730 ("csharp", "C#"),
731 ("objc", "Objective-C"),
732 ("objcpp", "Objective-C++"),
733 ("regex", "Regular Expressions (Python)"),
734 ("regexp", "Regular Expressions (Python)"),
735 ("proto", "Protocol Buffers"),
737 ("protobuf", "Protocol Buffers"),
738 ("gomod", "Go Module"),
739 ("git-rebase", "Git Rebase Todo"),
740 ("git-commit", "Git Commit Message"),
741 ("git-config", "Git Config"),
742 ("git-attributes", "Git Attributes"),
743 ("gitignore", "Gitignore"),
744 ("fsharp", "FSharp"),
745 ("f#", "FSharp"),
746 ("terraform", "HCL"),
747 ("tf", "HCL"),
748 ("ts", "TypeScript"),
749 ("js", "JavaScript"),
750 ("py", "Python"),
751 ("rb", "Ruby"),
752 ("rs", "Rust"),
753 ("md", "Markdown"),
754 ("yml", "YAML"),
755 ("dockerfile", "Dockerfile"),
756 ]
757 }
758
759 pub(crate) fn populate_built_in_aliases(&mut self) {
766 for (short, full) in Self::built_in_aliases() {
767 self.register_alias_inner(short, full, true);
768 }
769 self.rebuild_catalog();
770 }
771
772 pub(crate) fn register_alias(&mut self, short_name: &str, full_name: &str) -> bool {
782 if !self.register_alias_inner(short_name, full_name, false) {
783 return false;
784 }
785 let short_lower = short_name.to_lowercase();
786 let full_lower = full_name.to_lowercase();
787 if let Some(&idx) = self.catalog_by_name.get(&full_lower) {
788 self.catalog_by_name
789 .entry(short_lower.clone())
790 .or_insert(idx);
791 let entry = &mut self.catalog[idx];
792 let replace = match &entry.short_name {
793 None => true,
794 Some(existing) => short_name.len() < existing.len(),
795 };
796 if replace {
797 entry.short_name = Some(short_lower);
798 }
799 }
800 true
801 }
802
803 fn register_alias_inner(
804 &mut self,
805 short_name: &str,
806 full_name: &str,
807 is_built_in: bool,
808 ) -> bool {
809 let short_lower = short_name.to_lowercase();
810
811 let target_exists = self
813 .syntax_set
814 .syntaxes()
815 .iter()
816 .any(|s| s.name.eq_ignore_ascii_case(full_name));
817 if !target_exists {
818 if tree_sitter_for_syntect_name(full_name).is_some() {
822 return false;
823 }
824 if is_built_in {
825 tracing::warn!(
828 "[grammar-alias] Built-in alias '{}' -> '{}': target grammar not found, skipping",
829 short_name, full_name
830 );
831 } else {
832 tracing::warn!(
833 "[grammar-alias] Alias '{}' -> '{}': target grammar not found, skipping",
834 short_name,
835 full_name
836 );
837 }
838 return false;
839 }
840
841 let collides_with_full_name = self
843 .syntax_set
844 .syntaxes()
845 .iter()
846 .any(|s| s.name.eq_ignore_ascii_case(&short_lower));
847 if collides_with_full_name {
848 tracing::debug!(
852 "[grammar-alias] Alias '{}' matches an existing grammar name, skipping (not needed)",
853 short_name
854 );
855 return false;
856 }
857
858 if let Some(existing_target) = self.aliases.get(&short_lower) {
860 if existing_target.eq_ignore_ascii_case(full_name) {
861 return true;
863 }
864 let msg = format!(
865 "Alias '{}' already maps to '{}', cannot remap to '{}'",
866 short_name, existing_target, full_name
867 );
868 if is_built_in {
869 panic!("[grammar-alias] Built-in alias collision: {}", msg);
870 } else {
871 tracing::warn!("[grammar-alias] {}", msg);
872 return false;
873 }
874 }
875
876 let exact_name = self
878 .syntax_set
879 .syntaxes()
880 .iter()
881 .find(|s| s.name.eq_ignore_ascii_case(full_name))
882 .map(|s| s.name.clone())
883 .unwrap();
884
885 self.aliases.insert(short_lower, exact_name);
886 true
887 }
888
889 pub(crate) fn rebuild_catalog(&mut self) {
904 let mut short_by_full: HashMap<String, String> = HashMap::new();
911 let record = |map: &mut HashMap<String, String>, short: &str, full: &str| {
912 let key = full.to_lowercase();
913 let keep = match map.get(&key) {
914 None => true,
915 Some(existing) => short.len() < existing.len(),
916 };
917 if keep {
918 map.insert(key, short.to_string());
919 }
920 };
921 for (short, full) in Self::built_in_aliases() {
922 record(&mut short_by_full, short, full);
923 }
924 for (short, full) in &self.aliases {
925 record(&mut short_by_full, short, full);
926 }
927
928 let derive_language_id =
929 |display_name: &str| -> (String, Option<fresh_languages::Language>) {
930 let ts = tree_sitter_for_syntect_name(display_name);
931 let id = ts
932 .map(|l| l.id().to_string())
933 .unwrap_or_else(|| display_name.to_lowercase());
934 (id, ts)
935 };
936
937 let mut catalog: Vec<GrammarEntry> = Vec::new();
938 let mut scope_to_index: HashMap<String, usize> = HashMap::new();
939
940 for (idx, syntax) in self.syntax_set.syntaxes().iter().enumerate() {
961 if syntax.name == "Plain Text" || syntax.name == "JavaScript" {
962 continue;
963 }
964 let (language_id, tree_sitter) = derive_language_id(&syntax.name);
965 let short_name = short_by_full.get(&syntax.name.to_lowercase()).cloned();
966 let source = self
967 .grammar_sources
968 .get(&syntax.name)
969 .map(|info| info.source.clone())
970 .unwrap_or(GrammarSource::BuiltIn);
971 let entry_index = catalog.len();
972 scope_to_index.insert(syntax.scope.to_string(), entry_index);
973
974 let mut extensions = syntax.file_extensions.clone();
980 if let Some(lang) = tree_sitter {
981 for ext in lang.extensions() {
982 let ext = ext.to_string();
983 if !extensions.iter().any(|e| e == &ext) {
984 extensions.push(ext);
985 }
986 }
987 }
988
989 catalog.push(GrammarEntry {
990 display_name: syntax.name.clone(),
991 language_id,
992 short_name,
993 extensions,
994 filenames: Vec::new(),
995 filename_globs: Vec::new(),
996 source,
997 engines: GrammarEngines {
998 syntect: Some(idx),
999 tree_sitter,
1000 },
1001 });
1002 }
1003
1004 for (filename, scope) in &self.filename_scopes {
1006 if let Some(&idx) = scope_to_index.get(scope) {
1007 if !catalog[idx].filenames.iter().any(|f| f == filename) {
1008 catalog[idx].filenames.push(filename.clone());
1009 }
1010 }
1011 }
1012
1013 for (ext, scope) in &self.user_extensions {
1015 if let Some(&idx) = scope_to_index.get(scope) {
1016 if !catalog[idx].extensions.iter().any(|e| e == ext) {
1017 catalog[idx].extensions.push(ext.clone());
1018 }
1019 }
1020 }
1021
1022 let mut ts_covered: std::collections::HashSet<fresh_languages::Language> =
1027 std::collections::HashSet::new();
1028 for entry in &catalog {
1029 if let Some(lang) = entry.engines.tree_sitter {
1030 ts_covered.insert(lang);
1031 }
1032 }
1033 for lang in fresh_languages::Language::all() {
1034 if ts_covered.contains(lang) {
1035 continue;
1036 }
1037 let display_name = lang.display_name().to_string();
1038 let language_id = lang.id().to_string();
1039 let short_name = short_by_full.get(&display_name.to_lowercase()).cloned();
1040 let extensions: Vec<String> = lang.extensions().iter().map(|s| s.to_string()).collect();
1041 catalog.push(GrammarEntry {
1042 display_name,
1043 language_id,
1044 short_name,
1045 extensions,
1046 filenames: Vec::new(),
1047 filename_globs: Vec::new(),
1048 source: GrammarSource::BuiltIn,
1049 engines: GrammarEngines {
1050 syntect: None,
1051 tree_sitter: Some(*lang),
1052 },
1053 });
1054 }
1055
1056 let mut by_name: HashMap<String, usize> = HashMap::new();
1064 let mut by_extension: HashMap<String, usize> = HashMap::new();
1065 let mut by_filename: HashMap<String, usize> = HashMap::new();
1066 for (idx, entry) in catalog.iter().enumerate() {
1067 by_name.insert(entry.display_name.to_lowercase(), idx);
1068 by_name.insert(entry.language_id.to_lowercase(), idx);
1069 if let Some(short) = &entry.short_name {
1070 by_name.insert(short.to_lowercase(), idx);
1071 }
1072 for ext in &entry.extensions {
1073 by_extension.entry(ext.to_lowercase()).or_insert(idx);
1074 by_filename.entry(ext.clone()).or_insert(idx);
1075 }
1076 for filename in &entry.filenames {
1077 by_filename.entry(filename.clone()).or_insert(idx);
1078 }
1079 }
1080
1081 self.catalog = catalog;
1082 self.catalog_by_name = by_name;
1083 self.catalog_by_extension = by_extension;
1084 self.catalog_by_filename = by_filename;
1085
1086 if !self.applied_language_config.is_empty() {
1090 let cfg = std::mem::take(&mut self.applied_language_config);
1091 self.apply_language_config_inner(&cfg);
1092 self.applied_language_config = cfg;
1093 }
1094 self.catalog_gen = self.catalog_gen.wrapping_add(1);
1095 }
1096
1097 pub fn catalog(&self) -> &[GrammarEntry] {
1099 &self.catalog
1100 }
1101
1102 pub fn catalog_gen(&self) -> u64 {
1106 self.catalog_gen
1107 }
1108
1109 pub fn find_by_name(&self, name: &str) -> Option<&GrammarEntry> {
1115 self.catalog_by_name
1116 .get(&name.to_lowercase())
1117 .map(|&idx| &self.catalog[idx])
1118 }
1119
1120 pub fn find_by_path(&self, path: &Path, first_line: Option<&str>) -> Option<&GrammarEntry> {
1141 let filename = path.file_name().and_then(|n| n.to_str());
1142 let path_str = path.to_str().unwrap_or("");
1143
1144 if let Some(name) = filename {
1145 if let Some(&idx) = self.catalog_by_filename.get(name) {
1146 return Some(&self.catalog[idx]);
1147 }
1148 }
1149
1150 if let Some(name) = filename {
1152 for entry in &self.catalog {
1153 for pattern in &entry.filename_globs {
1154 let matched = if is_path_pattern(pattern) {
1155 path_glob_matches(pattern, path_str)
1156 } else {
1157 filename_glob_matches(pattern, name)
1158 };
1159 if matched {
1160 return Some(entry);
1161 }
1162 }
1163 }
1164 }
1165
1166 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1167 if let Some(entry) = self.find_by_extension(ext) {
1168 return Some(entry);
1169 }
1170 }
1171
1172 let line = first_line?;
1177 let syntax = self.syntax_set.find_syntax_by_first_line(line)?;
1178 self.find_by_name(&syntax.name)
1179 }
1180
1181 pub fn find_by_extension(&self, ext: &str) -> Option<&GrammarEntry> {
1183 self.catalog_by_extension
1184 .get(&ext.to_lowercase())
1185 .map(|&idx| &self.catalog[idx])
1186 }
1187
1188 pub fn apply_language_config(
1201 &mut self,
1202 languages: &HashMap<String, crate::config::LanguageConfig>,
1203 ) {
1204 self.applied_language_config = languages.clone();
1205 self.apply_language_config_inner(languages);
1206 self.catalog_gen = self.catalog_gen.wrapping_add(1);
1207 }
1208
1209 fn apply_language_config_inner(
1214 &mut self,
1215 languages: &HashMap<String, crate::config::LanguageConfig>,
1216 ) {
1217 for (lang_id, lang_cfg) in languages {
1218 let grammar_name = if lang_cfg.grammar.is_empty() {
1219 lang_id.as_str()
1220 } else {
1221 lang_cfg.grammar.as_str()
1222 };
1223
1224 let idx = self
1226 .catalog_by_name
1227 .get(&grammar_name.to_lowercase())
1228 .copied()
1229 .or_else(|| self.catalog_by_name.get(&lang_id.to_lowercase()).copied())
1230 .unwrap_or_else(|| {
1231 let idx = self.catalog.len();
1232 self.catalog.push(GrammarEntry {
1233 display_name: lang_id.clone(),
1234 language_id: lang_id.clone(),
1235 short_name: None,
1236 extensions: Vec::new(),
1237 filenames: Vec::new(),
1238 filename_globs: Vec::new(),
1239 source: GrammarSource::BuiltIn,
1240 engines: GrammarEngines::default(),
1241 });
1242 idx
1243 });
1244
1245 self.catalog_by_name
1250 .entry(lang_id.to_lowercase())
1251 .or_insert(idx);
1252
1253 for ext in &lang_cfg.extensions {
1254 if !self.catalog[idx].extensions.iter().any(|e| e == ext) {
1255 self.catalog[idx].extensions.push(ext.clone());
1256 }
1257 self.catalog_by_extension.insert(ext.to_lowercase(), idx);
1259 }
1260 for filename in &lang_cfg.filenames {
1261 if is_glob_pattern(filename) {
1262 if !self.catalog[idx]
1263 .filename_globs
1264 .iter()
1265 .any(|f| f == filename)
1266 {
1267 self.catalog[idx].filename_globs.push(filename.clone());
1268 }
1269 } else {
1270 if !self.catalog[idx].filenames.iter().any(|f| f == filename) {
1271 self.catalog[idx].filenames.push(filename.clone());
1272 }
1273 self.catalog_by_filename.insert(filename.clone(), idx);
1274 }
1275 }
1276 }
1277 }
1278
1279 pub fn syntax_set(&self) -> &Arc<SyntaxSet> {
1281 &self.syntax_set
1282 }
1283
1284 pub fn syntax_set_arc(&self) -> Arc<SyntaxSet> {
1286 Arc::clone(&self.syntax_set)
1287 }
1288
1289 pub fn available_syntaxes(&self) -> Vec<&str> {
1291 self.syntax_set
1292 .syntaxes()
1293 .iter()
1294 .map(|s| s.name.as_str())
1295 .collect()
1296 }
1297
1298 pub fn available_grammar_info(&self) -> Vec<GrammarInfo> {
1305 let mut result: Vec<GrammarInfo> = self
1306 .catalog
1307 .iter()
1308 .map(|entry| GrammarInfo {
1309 name: entry.display_name.clone(),
1310 source: entry.source.clone(),
1311 file_extensions: entry.extensions.clone(),
1312 short_name: entry.short_name.clone(),
1313 })
1314 .collect();
1315 result.sort_by(|a, b| a.name.to_lowercase().cmp(&b.name.to_lowercase()));
1316 result
1317 }
1318
1319 pub(crate) fn grammar_sources(&self) -> &HashMap<String, GrammarInfo> {
1321 &self.grammar_sources
1322 }
1323
1324 pub(crate) fn build_grammar_sources_from_syntax_set(
1328 syntax_set: &SyntaxSet,
1329 ) -> HashMap<String, GrammarInfo> {
1330 let mut sources = HashMap::new();
1331 for syntax in syntax_set.syntaxes() {
1332 sources.insert(
1333 syntax.name.clone(),
1334 GrammarInfo {
1335 name: syntax.name.clone(),
1336 source: GrammarSource::BuiltIn,
1337 file_extensions: syntax.file_extensions.clone(),
1338 short_name: None,
1339 },
1340 );
1341 }
1342 sources
1343 }
1344
1345 #[cfg(test)]
1347 pub(crate) fn user_extensions(&self) -> &HashMap<String, String> {
1348 &self.user_extensions
1349 }
1350
1351 #[cfg(test)]
1353 pub(crate) fn loaded_grammar_paths(&self) -> &[GrammarSpec] {
1354 &self.loaded_grammar_paths
1355 }
1356
1357 pub fn with_additional_grammars(
1371 base: &GrammarRegistry,
1372 additional: &[GrammarSpec],
1373 ) -> Option<Self> {
1374 tracing::info!(
1375 "[SYNTAX DEBUG] with_additional_grammars: adding {} grammars to base with {} syntaxes",
1376 additional.len(),
1377 base.syntax_set.syntaxes().len()
1378 );
1379
1380 let mut builder = (*base.syntax_set).clone().into_builder();
1384
1385 let mut user_extensions = base.user_extensions.clone();
1387
1388 let mut loaded_grammar_paths = base.loaded_grammar_paths.clone();
1390
1391 let mut grammar_sources = base.grammar_sources.clone();
1393
1394 for spec in additional {
1396 tracing::info!(
1397 "[SYNTAX DEBUG] loading new grammar file: lang='{}', path={:?}, extensions={:?}",
1398 spec.language,
1399 spec.path,
1400 spec.extensions
1401 );
1402 match Self::load_grammar_file(&spec.path) {
1403 Ok(syntax) => {
1404 let scope = syntax.scope.to_string();
1405 let syntax_name = syntax.name.clone();
1406 tracing::info!(
1407 "[SYNTAX DEBUG] grammar loaded successfully: name='{}', scope='{}'",
1408 syntax_name,
1409 scope
1410 );
1411 builder.add(syntax);
1412 tracing::info!(
1413 "Loaded grammar for '{}' from {:?} with extensions {:?}",
1414 spec.language,
1415 spec.path,
1416 spec.extensions
1417 );
1418 for ext in &spec.extensions {
1420 user_extensions.insert(ext.clone(), scope.clone());
1421 }
1422 grammar_sources.insert(
1424 syntax_name.clone(),
1425 GrammarInfo {
1426 name: syntax_name,
1427 source: GrammarSource::Plugin {
1428 plugin: spec.language.clone(),
1429 path: spec.path.clone(),
1430 },
1431 file_extensions: spec.extensions.clone(),
1432 short_name: None,
1433 },
1434 );
1435 loaded_grammar_paths.push(spec.clone());
1437 }
1438 Err(e) => {
1439 tracing::warn!(
1440 "Failed to load grammar for '{}' from {:?}: {}",
1441 spec.language,
1442 spec.path,
1443 e
1444 );
1445 }
1446 }
1447 }
1448
1449 let mut reg = Self {
1450 syntax_set: Arc::new(builder.build()),
1451 user_extensions,
1452 filename_scopes: base.filename_scopes.clone(),
1453 loaded_grammar_paths,
1454 grammar_sources,
1455 aliases: base.aliases.clone(),
1456 catalog: Vec::new(),
1457 catalog_by_name: HashMap::new(),
1458 catalog_by_extension: HashMap::new(),
1459 catalog_by_filename: HashMap::new(),
1460 applied_language_config: HashMap::new(),
1461 catalog_gen: 0,
1462 };
1463 reg.rebuild_catalog();
1464 Some(reg)
1465 }
1466
1467 pub(crate) fn load_grammar_file(path: &Path) -> Result<SyntaxDefinition, String> {
1473 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
1474
1475 match ext {
1476 "sublime-syntax" => {
1477 let content = std::fs::read_to_string(path)
1478 .map_err(|e| format!("Failed to read file: {}", e))?;
1479 SyntaxDefinition::load_from_str(
1480 &content,
1481 true,
1482 path.file_stem().and_then(|s| s.to_str()),
1483 )
1484 .map_err(|e| format!("Failed to parse sublime-syntax: {}", e))
1485 }
1486 _ => Err(format!(
1487 "Unsupported grammar format: .{}. Only .sublime-syntax is supported.",
1488 ext
1489 )),
1490 }
1491 }
1492}
1493
1494impl Default for GrammarRegistry {
1495 fn default() -> Self {
1496 let defaults = SyntaxSet::load_defaults_newlines();
1498 let mut builder = defaults.into_builder();
1499 Self::add_embedded_grammars(&mut builder);
1500 let syntax_set = builder.build();
1501 let filename_scopes = Self::build_filename_scopes();
1502 let extra_extensions = Self::build_extra_extensions();
1503
1504 let mut registry = Self::new(syntax_set, extra_extensions, filename_scopes);
1505 registry.populate_built_in_aliases();
1506 registry.rebuild_catalog();
1507 registry
1508 }
1509}
1510
1511#[derive(Debug, Deserialize)]
1514pub struct PackageManifest {
1515 #[serde(default)]
1516 pub contributes: Option<Contributes>,
1517}
1518
1519#[derive(Debug, Deserialize, Default)]
1520pub struct Contributes {
1521 #[serde(default)]
1522 pub languages: Vec<LanguageContribution>,
1523 #[serde(default)]
1524 pub grammars: Vec<GrammarContribution>,
1525}
1526
1527#[derive(Debug, Deserialize)]
1528pub struct LanguageContribution {
1529 pub id: String,
1530 #[serde(default)]
1531 pub extensions: Vec<String>,
1532}
1533
1534#[derive(Debug, Deserialize)]
1535pub struct GrammarContribution {
1536 pub language: String,
1537 #[serde(rename = "scopeName")]
1538 pub scope_name: String,
1539 pub path: String,
1540}
1541
1542#[cfg(test)]
1543mod tests {
1544 use super::*;
1545
1546 #[test]
1547 fn test_empty_registry() {
1548 let registry = GrammarRegistry::empty();
1549 assert!(!registry.available_syntaxes().is_empty());
1551 }
1552
1553 #[test]
1554 fn test_default_registry() {
1555 let registry = GrammarRegistry::default();
1556 assert!(!registry.available_syntaxes().is_empty());
1558 }
1559
1560 #[test]
1561 fn test_find_syntax_for_common_extensions() {
1562 let registry = GrammarRegistry::default();
1563
1564 let test_cases = [
1571 ("test.py", true),
1572 ("test.rs", true),
1573 ("test.js", false),
1574 ("test.json", true),
1575 ("test.md", true),
1576 ("test.html", true),
1577 ("test.css", true),
1578 ("test.unknown_extension_xyz", false),
1579 ];
1580
1581 for (filename, should_exist) in test_cases {
1582 let path = Path::new(filename);
1583 let result = registry.find_syntax_for_file(path);
1584 assert_eq!(
1585 result.is_some(),
1586 should_exist,
1587 "Expected {:?} for {}",
1588 should_exist,
1589 filename
1590 );
1591 }
1592 }
1593
1594 #[test]
1595 fn test_racket_grammar_loaded() {
1596 let registry = GrammarRegistry::default();
1597 for filename in ["main.rkt", "data.rktd", "info.rktl", "doc.scrbl"] {
1598 let result = registry.find_syntax_for_file(Path::new(filename));
1599 assert!(
1600 result.is_some(),
1601 "Racket grammar should be available for {}",
1602 filename
1603 );
1604 let entry = registry.find_by_path(Path::new(filename), None).unwrap();
1605 assert_eq!(entry.display_name, "Racket", "for {}", filename);
1606 }
1607 }
1608
1609 #[test]
1610 fn test_syntax_set_arc() {
1611 let registry = GrammarRegistry::default();
1612 let arc1 = registry.syntax_set_arc();
1613 let arc2 = registry.syntax_set_arc();
1614 assert!(Arc::ptr_eq(&arc1, &arc2));
1616 }
1617
1618 #[test]
1619 fn test_shell_dotfiles_detection() {
1620 let registry = GrammarRegistry::default();
1621
1622 let shell_files = [".zshrc", ".zprofile", ".zshenv", ".bash_aliases"];
1624
1625 for filename in shell_files {
1626 let path = Path::new(filename);
1627 let result = registry.find_syntax_for_file(path);
1628 assert!(
1629 result.is_some(),
1630 "{} should be detected as a syntax",
1631 filename
1632 );
1633 let syntax = result.unwrap();
1634 assert!(
1636 syntax.name.to_lowercase().contains("bash")
1637 || syntax.name.to_lowercase().contains("shell"),
1638 "{} should be detected as shell/bash, got: {}",
1639 filename,
1640 syntax.name
1641 );
1642 }
1643 }
1644
1645 #[test]
1646 fn test_pkgbuild_detection() {
1647 let registry = GrammarRegistry::default();
1648
1649 for filename in ["PKGBUILD", "APKBUILD"] {
1651 let path = Path::new(filename);
1652 let result = registry.find_syntax_for_file(path);
1653 assert!(
1654 result.is_some(),
1655 "{} should be detected as a syntax",
1656 filename
1657 );
1658 let syntax = result.unwrap();
1659 assert!(
1661 syntax.name.to_lowercase().contains("bash")
1662 || syntax.name.to_lowercase().contains("shell"),
1663 "{} should be detected as shell/bash, got: {}",
1664 filename,
1665 syntax.name
1666 );
1667 }
1668 }
1669
1670 #[test]
1671 fn test_find_syntax_with_glob_filenames() {
1672 let mut registry = GrammarRegistry::default();
1673 let mut languages = std::collections::HashMap::new();
1674 languages.insert(
1675 "shell-configs".to_string(),
1676 crate::config::LanguageConfig {
1677 extensions: vec!["sh".to_string()],
1678 filenames: vec!["*.conf".to_string(), "*rc".to_string()],
1679 grammar: "bash".to_string(),
1680 comment_prefix: Some("#".to_string()),
1681 auto_indent: true,
1682 auto_close: None,
1683 auto_surround: None,
1684 textmate_grammar: None,
1685 show_whitespace_tabs: true,
1686 line_wrap: None,
1687 wrap_column: None,
1688 page_view: None,
1689 page_width: None,
1690 use_tabs: None,
1691 tab_size: None,
1692 formatter: None,
1693 format_on_save: false,
1694 on_save: vec![],
1695 word_characters: None,
1696 },
1697 );
1698 registry.apply_language_config(&languages);
1699
1700 assert!(
1701 registry
1702 .find_by_path(Path::new("nftables.conf"), None)
1703 .is_some(),
1704 "*.conf should match nftables.conf"
1705 );
1706 assert!(
1707 registry.find_by_path(Path::new("lfrc"), None).is_some(),
1708 "*rc should match lfrc"
1709 );
1710 let _ = registry.find_by_path(Path::new("randomfile"), None);
1712 }
1713
1714 #[test]
1715 fn test_find_syntax_with_path_glob_filenames() {
1716 let mut registry = GrammarRegistry::default();
1717 let mut languages = std::collections::HashMap::new();
1718 languages.insert(
1719 "shell-configs".to_string(),
1720 crate::config::LanguageConfig {
1721 extensions: vec!["sh".to_string()],
1722 filenames: vec!["/etc/**/rc.*".to_string()],
1723 grammar: "bash".to_string(),
1724 comment_prefix: Some("#".to_string()),
1725 auto_indent: true,
1726 auto_close: None,
1727 auto_surround: None,
1728 textmate_grammar: None,
1729 show_whitespace_tabs: true,
1730 line_wrap: None,
1731 wrap_column: None,
1732 page_view: None,
1733 page_width: None,
1734 use_tabs: None,
1735 tab_size: None,
1736 formatter: None,
1737 format_on_save: false,
1738 on_save: vec![],
1739 word_characters: None,
1740 },
1741 );
1742 registry.apply_language_config(&languages);
1743
1744 assert!(
1745 registry
1746 .find_by_path(Path::new("/etc/rc.conf"), None)
1747 .is_some(),
1748 "/etc/**/rc.* should match /etc/rc.conf"
1749 );
1750 assert!(
1751 registry
1752 .find_by_path(Path::new("/etc/init/rc.local"), None)
1753 .is_some(),
1754 "/etc/**/rc.* should match /etc/init/rc.local"
1755 );
1756 let _ = registry.find_by_path(Path::new("/var/rc.conf"), None);
1757 }
1758
1759 #[test]
1760 fn test_exact_filename_takes_priority_over_glob() {
1761 let mut registry = GrammarRegistry::default();
1762 let mut languages = std::collections::HashMap::new();
1763
1764 languages.insert(
1766 "custom-lfrc".to_string(),
1767 crate::config::LanguageConfig {
1768 extensions: vec![],
1769 filenames: vec!["lfrc".to_string()],
1770 grammar: "python".to_string(),
1771 comment_prefix: Some("#".to_string()),
1772 auto_indent: true,
1773 auto_close: None,
1774 auto_surround: None,
1775 textmate_grammar: None,
1776 show_whitespace_tabs: true,
1777 line_wrap: None,
1778 wrap_column: None,
1779 page_view: None,
1780 page_width: None,
1781 use_tabs: None,
1782 tab_size: None,
1783 formatter: None,
1784 format_on_save: false,
1785 on_save: vec![],
1786 word_characters: None,
1787 },
1788 );
1789
1790 languages.insert(
1792 "rc-files".to_string(),
1793 crate::config::LanguageConfig {
1794 extensions: vec![],
1795 filenames: vec!["*rc".to_string()],
1796 grammar: "bash".to_string(),
1797 comment_prefix: Some("#".to_string()),
1798 auto_indent: true,
1799 auto_close: None,
1800 auto_surround: None,
1801 textmate_grammar: None,
1802 show_whitespace_tabs: true,
1803 line_wrap: None,
1804 wrap_column: None,
1805 page_view: None,
1806 page_width: None,
1807 use_tabs: None,
1808 tab_size: None,
1809 formatter: None,
1810 format_on_save: false,
1811 on_save: vec![],
1812 word_characters: None,
1813 },
1814 );
1815
1816 registry.apply_language_config(&languages);
1817
1818 let entry = registry.find_by_path(Path::new("lfrc"), None).unwrap();
1820 assert!(
1821 entry.display_name.to_lowercase().contains("python"),
1822 "exact match should win over glob, got: {}",
1823 entry.display_name
1824 );
1825 }
1826
1827 #[test]
1828 fn test_built_in_aliases_resolve() {
1829 let registry = GrammarRegistry::default();
1830
1831 let syntax = registry.find_syntax_by_name("bash");
1833 assert!(syntax.is_some(), "alias 'bash' should resolve");
1834 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1835
1836 let syntax = registry.find_syntax_by_name("cpp");
1838 assert!(syntax.is_some(), "alias 'cpp' should resolve");
1839 assert_eq!(syntax.unwrap().name, "C++");
1840
1841 let syntax = registry.find_syntax_by_name("csharp");
1843 assert!(syntax.is_some(), "alias 'csharp' should resolve");
1844 assert_eq!(syntax.unwrap().name, "C#");
1845
1846 let syntax = registry.find_syntax_by_name("sh");
1848 assert!(syntax.is_some(), "alias 'sh' should resolve");
1849 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1850
1851 let syntax = registry.find_syntax_by_name("proto");
1853 assert!(syntax.is_some(), "alias 'proto' should resolve");
1854 assert_eq!(syntax.unwrap().name, "Protocol Buffers");
1855 }
1856
1857 #[test]
1858 fn test_alias_case_insensitive_input() {
1859 let registry = GrammarRegistry::default();
1860
1861 let syntax = registry.find_syntax_by_name("BASH");
1863 assert!(
1864 syntax.is_some(),
1865 "alias 'BASH' should resolve case-insensitively"
1866 );
1867 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1868
1869 let syntax = registry.find_syntax_by_name("Cpp");
1870 assert!(
1871 syntax.is_some(),
1872 "alias 'Cpp' should resolve case-insensitively"
1873 );
1874 assert_eq!(syntax.unwrap().name, "C++");
1875 }
1876
1877 #[test]
1878 fn test_full_name_still_works() {
1879 let registry = GrammarRegistry::default();
1880
1881 let syntax = registry.find_syntax_by_name("Bourne Again Shell (bash)");
1883 assert!(syntax.is_some(), "full name should still resolve");
1884 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1885
1886 let syntax = registry.find_syntax_by_name("bourne again shell (bash)");
1888 assert!(
1889 syntax.is_some(),
1890 "case-insensitive full name should resolve"
1891 );
1892 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1893 }
1894
1895 #[test]
1896 fn test_alias_does_not_shadow_full_names() {
1897 let registry = GrammarRegistry::default();
1898
1899 let syntax = registry.find_syntax_by_name("rust");
1901 assert!(syntax.is_some());
1902 assert_eq!(syntax.unwrap().name, "Rust");
1903
1904 let syntax = registry.find_syntax_by_name("go");
1906 assert!(syntax.is_some());
1907 assert_eq!(syntax.unwrap().name, "Go");
1908 }
1909
1910 #[test]
1911 fn test_register_alias_rejects_collision() {
1912 let mut registry = GrammarRegistry::default();
1913
1914 assert!(registry.register_alias("myalias", "Rust"));
1916 assert!(!registry.register_alias("myalias", "Go"));
1917
1918 assert!(registry.register_alias("myalias", "Rust"));
1920 }
1921
1922 #[test]
1923 fn test_register_alias_rejects_nonexistent_target() {
1924 let mut registry = GrammarRegistry::default();
1925 assert!(!registry.register_alias("nope", "Nonexistent Grammar"));
1926 }
1927
1928 #[test]
1929 fn test_register_alias_skips_existing_grammar_name() {
1930 let mut registry = GrammarRegistry::default();
1931
1932 assert!(!registry.register_alias("rust", "Rust"));
1934 assert!(registry.find_syntax_by_name("rust").is_some());
1936 }
1937
1938 #[test]
1939 fn test_available_grammar_info_includes_short_names() {
1940 let registry = GrammarRegistry::default();
1941 let infos = registry.available_grammar_info();
1942
1943 let bash_info = infos.iter().find(|g| g.name == "Bourne Again Shell (bash)");
1944 assert!(bash_info.is_some(), "bash grammar should be in the list");
1945 let bash_info = bash_info.unwrap();
1946 assert!(
1947 bash_info.short_name.is_some(),
1948 "bash grammar should have a short_name"
1949 );
1950 assert_eq!(bash_info.short_name.as_deref(), Some("sh"));
1952 }
1953
1954 #[test]
1955 fn test_catalog_contains_each_language_once() {
1956 let registry = GrammarRegistry::default();
1957 let catalog = registry.catalog();
1958
1959 let mut seen = std::collections::HashSet::new();
1961 for entry in catalog {
1962 let key = entry.display_name.to_lowercase();
1963 assert!(
1964 seen.insert(key.clone()),
1965 "duplicate catalog entry for display_name={:?}",
1966 entry.display_name
1967 );
1968 }
1969
1970 let ts = registry
1973 .find_by_name("TypeScript")
1974 .expect("TypeScript must be in the catalog");
1975 assert!(ts.engines.syntect.is_none());
1976 assert_eq!(
1977 ts.engines.tree_sitter,
1978 Some(fresh_languages::Language::TypeScript)
1979 );
1980 assert_eq!(ts.language_id, "typescript");
1981 assert!(ts.extensions.iter().any(|e| e == "ts"));
1982
1983 for name in ["Rust", "Python"] {
1986 let entry = registry
1987 .find_by_name(name)
1988 .unwrap_or_else(|| panic!("{} must be in the catalog", name));
1989 assert!(
1990 entry.engines.syntect.is_some(),
1991 "{} should have a syntect index",
1992 name
1993 );
1994 assert!(
1995 entry.engines.tree_sitter.is_some(),
1996 "{} should also have a tree-sitter language",
1997 name
1998 );
1999 let by_id = registry
2002 .find_by_name(&entry.language_id)
2003 .expect("language_id should resolve");
2004 assert_eq!(by_id.display_name, entry.display_name);
2005 }
2006
2007 let js = registry
2013 .find_by_name("JavaScript")
2014 .expect("JavaScript must be in the catalog");
2015 assert!(
2016 js.engines.syntect.is_none(),
2017 "JavaScript must not be routed to the syntect engine (issue #899)"
2018 );
2019 assert_eq!(
2020 js.engines.tree_sitter,
2021 Some(fresh_languages::Language::JavaScript),
2022 "JavaScript must carry the tree-sitter language"
2023 );
2024 }
2025
2026 #[test]
2027 fn test_catalog_find_by_path_and_extension() {
2028 let registry = GrammarRegistry::default();
2029 let ts = registry
2030 .find_by_path(Path::new("foo.ts"), None)
2031 .expect("foo.ts should resolve");
2032 assert_eq!(ts.display_name, "TypeScript");
2033 let rs = registry.find_by_extension("rs").expect("rs should resolve");
2034 assert_eq!(rs.display_name, "Rust");
2035 }
2036
2037 fn lang_cfg(
2039 grammar: &str,
2040 extensions: &[&str],
2041 filenames: &[&str],
2042 ) -> crate::config::LanguageConfig {
2043 crate::config::LanguageConfig {
2044 extensions: extensions.iter().map(|s| s.to_string()).collect(),
2045 filenames: filenames.iter().map(|s| s.to_string()).collect(),
2046 grammar: grammar.to_string(),
2047 comment_prefix: None,
2048 auto_indent: true,
2049 auto_close: None,
2050 auto_surround: None,
2051 textmate_grammar: None,
2052 show_whitespace_tabs: true,
2053 line_wrap: None,
2054 wrap_column: None,
2055 page_view: None,
2056 page_width: None,
2057 use_tabs: None,
2058 tab_size: None,
2059 formatter: None,
2060 format_on_save: false,
2061 on_save: vec![],
2062 word_characters: None,
2063 }
2064 }
2065
2066 #[test]
2070 fn test_user_alias_resolves_via_find_by_name() {
2071 let mut registry = GrammarRegistry::default();
2072 let mut languages = std::collections::HashMap::new();
2073 languages.insert("mylang".to_string(), lang_cfg("Rust", &[], &[]));
2074 registry.apply_language_config(&languages);
2075
2076 let entry = registry
2077 .find_by_name("mylang")
2078 .expect("user-declared alias 'mylang' must resolve");
2079 assert_eq!(entry.display_name, "Rust");
2080 }
2081
2082 #[test]
2086 fn test_register_alias_preserves_applied_language_config() {
2087 let mut registry = GrammarRegistry::default();
2088 let mut languages = std::collections::HashMap::new();
2089 languages.insert(
2090 "shell-configs".to_string(),
2091 lang_cfg("bash", &["myconf"], &["*.myconf"]),
2092 );
2093 registry.apply_language_config(&languages);
2094
2095 assert!(registry.find_by_extension("myconf").is_some());
2097 assert!(
2098 registry
2099 .find_by_path(Path::new("foo.myconf"), None)
2100 .is_some(),
2101 "glob should match before register_alias"
2102 );
2103
2104 registry.register_alias("mycustom", "Rust");
2106
2107 assert!(
2108 registry.find_by_extension("myconf").is_some(),
2109 "config extension must survive register_alias"
2110 );
2111 assert!(
2112 registry
2113 .find_by_path(Path::new("foo.myconf"), None)
2114 .is_some(),
2115 "glob must survive register_alias"
2116 );
2117 }
2118
2119 #[test]
2123 fn test_from_syntax_name_preserves_canonical_display_name() {
2124 use crate::primitives::detected_language::DetectedLanguage;
2125 let registry = GrammarRegistry::default();
2126 let languages = std::collections::HashMap::new();
2127
2128 let detected = DetectedLanguage::from_syntax_name("BASH", ®istry, &languages)
2129 .expect("BASH should resolve via alias");
2130 assert_eq!(
2131 detected.display_name, "Bourne Again Shell (bash)",
2132 "display_name must be canonical, not user-typed"
2133 );
2134 }
2135
2136 #[test]
2140 fn test_config_only_language_appears_in_catalog() {
2141 let mut registry = GrammarRegistry::default();
2142 let mut languages = std::collections::HashMap::new();
2143 languages.insert("fish".to_string(), lang_cfg("fish", &["fish"], &[]));
2145 registry.apply_language_config(&languages);
2146
2147 let entry = registry
2148 .find_by_name("fish")
2149 .expect("fish should be in the catalog after apply_language_config");
2150 assert!(entry.engines.syntect.is_none());
2151 assert!(entry.engines.tree_sitter.is_none());
2152 assert_eq!(entry.language_id, "fish");
2153 assert!(entry.extensions.iter().any(|e| e == "fish"));
2154 }
2155
2156 #[test]
2161 fn test_config_extension_overrides_builtin() {
2162 let mut registry = GrammarRegistry::default();
2163 assert_eq!(
2165 registry.find_by_extension("js").unwrap().display_name,
2166 "JavaScript"
2167 );
2168
2169 let mut languages = std::collections::HashMap::new();
2170 languages.insert(
2171 "ts-overlay".to_string(),
2172 lang_cfg("TypeScript", &["js"], &[]),
2173 );
2174 registry.apply_language_config(&languages);
2175
2176 assert_eq!(
2177 registry.find_by_extension("js").unwrap().display_name,
2178 "TypeScript",
2179 "user-config extension must win over built-in"
2180 );
2181 }
2182
2183 #[test]
2190 fn test_bare_filename_resolves_via_find_by_path() {
2191 let registry = GrammarRegistry::default();
2192 for (filename, expected_substr) in [
2193 ("Gemfile", "ruby"),
2194 ("Rakefile", "ruby"),
2195 ("Vagrantfile", "ruby"),
2196 ("Makefile", "makefile"),
2197 ("GNUmakefile", "makefile"),
2198 ] {
2199 let entry = registry
2200 .find_by_path(Path::new(filename), None)
2201 .unwrap_or_else(|| panic!("{} must resolve via catalog", filename));
2202 assert!(
2203 entry.display_name.to_lowercase().contains(expected_substr),
2204 "{} should resolve to {} grammar, got {}",
2205 filename,
2206 expected_substr,
2207 entry.display_name
2208 );
2209 }
2210 }
2211
2212 #[test]
2217 fn test_jsx_resolves_to_javascript() {
2218 let registry = GrammarRegistry::default();
2219 let entry = registry
2220 .find_by_path(Path::new("foo.jsx"), None)
2221 .expect("foo.jsx must resolve");
2222 assert_eq!(entry.display_name, "JavaScript");
2223 }
2224
2225 #[test]
2230 fn test_rebuild_catalog_replays_language_config() {
2231 let mut registry = GrammarRegistry::default();
2232 let mut languages = std::collections::HashMap::new();
2233 languages.insert(
2234 "myshell".to_string(),
2235 lang_cfg("bash", &["myext"], &["*.myglob"]),
2236 );
2237 registry.apply_language_config(&languages);
2238 assert!(registry.find_by_extension("myext").is_some());
2239 assert!(registry
2240 .find_by_path(Path::new("foo.myglob"), None)
2241 .is_some());
2242
2243 registry.rebuild_catalog();
2246 assert!(
2247 registry.find_by_extension("myext").is_some(),
2248 "rebuild_catalog must replay applied user config"
2249 );
2250 assert!(
2251 registry
2252 .find_by_path(Path::new("foo.myglob"), None)
2253 .is_some(),
2254 "rebuild_catalog must replay user globs"
2255 );
2256 }
2257
2258 #[test]
2261 fn test_apply_language_config_idempotent() {
2262 let mut registry = GrammarRegistry::default();
2263 let mut languages = std::collections::HashMap::new();
2264 languages.insert(
2265 "shell-cfg".to_string(),
2266 lang_cfg("bash", &["myconf"], &["*.myconf"]),
2267 );
2268
2269 registry.apply_language_config(&languages);
2270 let first_extensions = registry
2271 .find_by_name("bash")
2272 .unwrap()
2273 .extensions
2274 .iter()
2275 .filter(|e| e == &"myconf")
2276 .count();
2277 let first_globs = registry
2278 .find_by_name("bash")
2279 .unwrap()
2280 .filename_globs
2281 .iter()
2282 .filter(|g| g == &"*.myconf")
2283 .count();
2284 assert_eq!(first_extensions, 1);
2285 assert_eq!(first_globs, 1);
2286
2287 registry.apply_language_config(&languages);
2289 let second_extensions = registry
2290 .find_by_name("bash")
2291 .unwrap()
2292 .extensions
2293 .iter()
2294 .filter(|e| e == &"myconf")
2295 .count();
2296 let second_globs = registry
2297 .find_by_name("bash")
2298 .unwrap()
2299 .filename_globs
2300 .iter()
2301 .filter(|g| g == &"*.myconf")
2302 .count();
2303 assert_eq!(second_extensions, 1, "extensions must not duplicate");
2304 assert_eq!(second_globs, 1, "globs must not duplicate");
2305 }
2306
2307 #[test]
2311 fn test_tree_sitter_bridge() {
2312 assert_eq!(
2313 tree_sitter_for_syntect_name("Bourne Again Shell (bash)"),
2314 Some(fresh_languages::Language::Bash)
2315 );
2316 assert_eq!(
2317 tree_sitter_for_syntect_name("Rust"),
2318 Some(fresh_languages::Language::Rust)
2319 );
2320 assert_eq!(tree_sitter_for_syntect_name("Nushell"), None);
2322 assert_eq!(tree_sitter_for_syntect_name("does-not-exist"), None);
2324 }
2325}