1use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10use syntect::parsing::{SyntaxDefinition, SyntaxReference, SyntaxSet, SyntaxSetBuilder};
11
12pub use crate::primitives::glob_match::{
14 filename_glob_matches, is_glob_pattern, is_path_pattern, path_glob_matches,
15};
16
17#[derive(Clone, Debug)]
22pub struct GrammarSpec {
23 pub language: String,
25 pub path: PathBuf,
27 pub extensions: Vec<String>,
29}
30
31#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
33#[serde(tag = "type")]
34pub enum GrammarSource {
35 #[serde(rename = "built-in")]
37 BuiltIn,
38 #[serde(rename = "user")]
40 User { path: PathBuf },
41 #[serde(rename = "language-pack")]
43 LanguagePack { name: String, path: PathBuf },
44 #[serde(rename = "bundle")]
46 Bundle { name: String, path: PathBuf },
47 #[serde(rename = "plugin")]
49 Plugin { plugin: String, path: PathBuf },
50}
51
52impl std::fmt::Display for GrammarSource {
53 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54 match self {
55 GrammarSource::BuiltIn => write!(f, "built-in"),
56 GrammarSource::User { path } => write!(f, "user ({})", path.display()),
57 GrammarSource::LanguagePack { name, .. } => write!(f, "language-pack ({})", name),
58 GrammarSource::Bundle { name, .. } => write!(f, "bundle ({})", name),
59 GrammarSource::Plugin { plugin, .. } => write!(f, "plugin ({})", plugin),
60 }
61 }
62}
63
64#[derive(Clone, Debug, Serialize, Deserialize)]
66pub struct GrammarInfo {
67 pub name: String,
69 pub source: GrammarSource,
71 pub file_extensions: Vec<String>,
73 #[serde(default, skip_serializing_if = "Option::is_none")]
75 pub short_name: Option<String>,
76}
77
78const SYNTECT_TO_TREE_SITTER_ALIASES: &[(&str, fresh_languages::Language)] =
86 &[("Bourne Again Shell (bash)", fresh_languages::Language::Bash)];
87
88fn tree_sitter_for_syntect_name(display_name: &str) -> Option<fresh_languages::Language> {
91 for (syntect_name, lang) in SYNTECT_TO_TREE_SITTER_ALIASES {
92 if *syntect_name == display_name {
93 return Some(*lang);
94 }
95 }
96 fresh_languages::Language::all()
97 .iter()
98 .find(|l| l.display_name() == display_name)
99 .copied()
100}
101
102#[derive(Clone, Debug, Default)]
107pub struct GrammarEngines {
108 pub syntect: Option<usize>,
111 pub tree_sitter: Option<fresh_languages::Language>,
113}
114
115#[derive(Clone, Debug)]
124pub struct GrammarEntry {
125 pub display_name: String,
127 pub language_id: String,
129 pub short_name: Option<String>,
131 pub extensions: Vec<String>,
133 pub filenames: Vec<String>,
135 pub filename_globs: Vec<String>,
137 pub source: GrammarSource,
139 pub engines: GrammarEngines,
141}
142
143pub const TOML_GRAMMAR: &str = include_str!("../../grammars/toml.sublime-syntax");
145
146pub const ODIN_GRAMMAR: &str = include_str!("../../grammars/odin/Odin.sublime-syntax");
149
150pub const ZIG_GRAMMAR: &str = include_str!("../../grammars/zig.sublime-syntax");
152
153pub const GIT_REBASE_GRAMMAR: &str = include_str!("../../grammars/git-rebase.sublime-syntax");
155
156pub const GIT_COMMIT_GRAMMAR: &str = include_str!("../../grammars/git-commit.sublime-syntax");
158
159pub const GITIGNORE_GRAMMAR: &str = include_str!("../../grammars/gitignore.sublime-syntax");
161
162pub const GITCONFIG_GRAMMAR: &str = include_str!("../../grammars/gitconfig.sublime-syntax");
164
165pub const GITATTRIBUTES_GRAMMAR: &str = include_str!("../../grammars/gitattributes.sublime-syntax");
167
168pub const TYPST_GRAMMAR: &str = include_str!("../../grammars/typst.sublime-syntax");
170
171pub const DOCKERFILE_GRAMMAR: &str = include_str!("../../grammars/dockerfile.sublime-syntax");
173pub const INI_GRAMMAR: &str = include_str!("../../grammars/ini.sublime-syntax");
175pub const CMAKE_GRAMMAR: &str = include_str!("../../grammars/cmake.sublime-syntax");
177pub const SCSS_GRAMMAR: &str = include_str!("../../grammars/scss.sublime-syntax");
179pub const LESS_GRAMMAR: &str = include_str!("../../grammars/less.sublime-syntax");
181pub const POWERSHELL_GRAMMAR: &str = include_str!("../../grammars/powershell.sublime-syntax");
183pub const KOTLIN_GRAMMAR: &str = include_str!("../../grammars/kotlin.sublime-syntax");
185pub const SWIFT_GRAMMAR: &str = include_str!("../../grammars/swift.sublime-syntax");
187pub const DART_GRAMMAR: &str = include_str!("../../grammars/dart.sublime-syntax");
189pub const ELIXIR_GRAMMAR: &str = include_str!("../../grammars/elixir.sublime-syntax");
191pub const FSHARP_GRAMMAR: &str = include_str!("../../grammars/fsharp.sublime-syntax");
193pub const NIX_GRAMMAR: &str = include_str!("../../grammars/nix.sublime-syntax");
195pub const HCL_GRAMMAR: &str = include_str!("../../grammars/hcl.sublime-syntax");
197pub const PROTOBUF_GRAMMAR: &str = include_str!("../../grammars/protobuf.sublime-syntax");
199pub const GRAPHQL_GRAMMAR: &str = include_str!("../../grammars/graphql.sublime-syntax");
201pub const JULIA_GRAMMAR: &str = include_str!("../../grammars/julia.sublime-syntax");
203pub const NIM_GRAMMAR: &str = include_str!("../../grammars/nim.sublime-syntax");
205pub const GLEAM_GRAMMAR: &str = include_str!("../../grammars/gleam.sublime-syntax");
207pub const VLANG_GRAMMAR: &str = include_str!("../../grammars/vlang.sublime-syntax");
209pub const SOLIDITY_GRAMMAR: &str = include_str!("../../grammars/solidity.sublime-syntax");
211pub const KDL_GRAMMAR: &str = include_str!("../../grammars/kdl.sublime-syntax");
213pub const NUSHELL_GRAMMAR: &str = include_str!("../../grammars/nushell.sublime-syntax");
215pub const STARLARK_GRAMMAR: &str = include_str!("../../grammars/starlark.sublime-syntax");
217pub const JUSTFILE_GRAMMAR: &str = include_str!("../../grammars/justfile.sublime-syntax");
219pub const EARTHFILE_GRAMMAR: &str = include_str!("../../grammars/earthfile.sublime-syntax");
221pub const GOMOD_GRAMMAR: &str = include_str!("../../grammars/gomod.sublime-syntax");
223pub const VUE_GRAMMAR: &str = include_str!("../../grammars/vue.sublime-syntax");
225pub const SVELTE_GRAMMAR: &str = include_str!("../../grammars/svelte.sublime-syntax");
227pub const ASTRO_GRAMMAR: &str = include_str!("../../grammars/astro.sublime-syntax");
229pub const HYPRLANG_GRAMMAR: &str = include_str!("../../grammars/hyprlang.sublime-syntax");
231pub const AUTOHOTKEY_GRAMMAR: &str =
234 include_str!("../../grammars/autohotkey/AutoHotkey.sublime-syntax");
235pub const RACKET_GRAMMAR: &str = include_str!("../../grammars/racket.sublime-syntax");
237
238impl std::fmt::Debug for GrammarRegistry {
243 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
244 f.debug_struct("GrammarRegistry")
245 .field("syntax_count", &self.syntax_set.syntaxes().len())
246 .finish()
247 }
248}
249
250pub struct GrammarRegistry {
251 syntax_set: Arc<SyntaxSet>,
253 user_extensions: HashMap<String, String>,
255 filename_scopes: HashMap<String, String>,
257 loaded_grammar_paths: Vec<GrammarSpec>,
259 grammar_sources: HashMap<String, GrammarInfo>,
261 aliases: HashMap<String, String>,
265 catalog: Vec<GrammarEntry>,
269 catalog_by_name: HashMap<String, usize>,
272 catalog_by_extension: HashMap<String, usize>,
274 catalog_by_filename: HashMap<String, usize>,
276 applied_language_config: HashMap<String, crate::config::LanguageConfig>,
281 catalog_gen: u64,
285}
286
287impl GrammarRegistry {
288 pub(crate) fn new(
293 syntax_set: SyntaxSet,
294 user_extensions: HashMap<String, String>,
295 filename_scopes: HashMap<String, String>,
296 ) -> Self {
297 Self::new_with_loaded_paths(
298 syntax_set,
299 user_extensions,
300 filename_scopes,
301 Vec::new(),
302 HashMap::new(),
303 )
304 }
305
306 pub(crate) fn new_with_loaded_paths(
311 syntax_set: SyntaxSet,
312 user_extensions: HashMap<String, String>,
313 filename_scopes: HashMap<String, String>,
314 loaded_grammar_paths: Vec<GrammarSpec>,
315 grammar_sources: HashMap<String, GrammarInfo>,
316 ) -> Self {
317 let mut reg = Self {
318 syntax_set: Arc::new(syntax_set),
319 user_extensions,
320 filename_scopes,
321 loaded_grammar_paths,
322 grammar_sources,
323 aliases: HashMap::new(),
324 catalog: Vec::new(),
325 catalog_by_name: HashMap::new(),
326 catalog_by_extension: HashMap::new(),
327 catalog_by_filename: HashMap::new(),
328 applied_language_config: HashMap::new(),
329 catalog_gen: 0,
330 };
331 reg.rebuild_catalog();
332 reg
333 }
334
335 pub fn empty() -> Arc<Self> {
337 let mut builder = SyntaxSetBuilder::new();
338 builder.add_plain_text_syntax();
339 let mut reg = Self {
340 syntax_set: Arc::new(builder.build()),
341 user_extensions: HashMap::new(),
342 filename_scopes: HashMap::new(),
343 loaded_grammar_paths: Vec::new(),
344 grammar_sources: HashMap::new(),
345 aliases: HashMap::new(),
346 catalog: Vec::new(),
347 catalog_by_name: HashMap::new(),
348 catalog_by_extension: HashMap::new(),
349 catalog_by_filename: HashMap::new(),
350 applied_language_config: HashMap::new(),
351 catalog_gen: 0,
352 };
353 reg.rebuild_catalog();
354 Arc::new(reg)
355 }
356
357 pub fn defaults_only() -> Arc<Self> {
364 tracing::info!("defaults_only: loading pre-compiled syntax packdump...");
368 let syntax_set: SyntaxSet = syntect::dumps::from_uncompressed_data(include_bytes!(
369 concat!(env!("OUT_DIR"), "/default_syntaxes.packdump")
370 ))
371 .expect("Failed to load pre-compiled syntax packdump");
372 tracing::info!(
373 "defaults_only: loaded ({} syntaxes)",
374 syntax_set.syntaxes().len()
375 );
376 let grammar_sources = Self::build_grammar_sources_from_syntax_set(&syntax_set);
377 let filename_scopes = Self::build_filename_scopes();
378 let extra_extensions = Self::build_extra_extensions();
379 let mut registry = Self {
380 syntax_set: Arc::new(syntax_set),
381 user_extensions: extra_extensions,
382 filename_scopes,
383 loaded_grammar_paths: Vec::new(),
384 grammar_sources,
385 aliases: HashMap::new(),
386 catalog: Vec::new(),
387 catalog_by_name: HashMap::new(),
388 catalog_by_extension: HashMap::new(),
389 catalog_by_filename: HashMap::new(),
390 applied_language_config: HashMap::new(),
391 catalog_gen: 0,
392 };
393 registry.populate_built_in_aliases();
394 registry.rebuild_catalog();
395 Arc::new(registry)
396 }
397
398 pub(crate) fn build_extra_extensions() -> HashMap<String, String> {
403 let mut map = HashMap::new();
404
405 let js_scope = "source.js".to_string();
407 map.insert("cjs".to_string(), js_scope.clone());
408 map.insert("mjs".to_string(), js_scope);
409
410 map
414 }
415
416 pub(crate) fn build_filename_scopes() -> HashMap<String, String> {
418 let mut map = HashMap::new();
419
420 let shell_scope = "source.shell.bash".to_string();
422 for filename in [
423 ".zshrc",
424 ".zprofile",
425 ".zshenv",
426 ".zlogin",
427 ".zlogout",
428 ".bash_aliases",
429 "PKGBUILD",
432 "APKBUILD",
433 ] {
434 map.insert(filename.to_string(), shell_scope.clone());
435 }
436
437 let git_rebase_scope = "source.git-rebase-todo".to_string();
439 map.insert("git-rebase-todo".to_string(), git_rebase_scope);
440
441 let git_commit_scope = "source.git-commit".to_string();
443 for filename in ["COMMIT_EDITMSG", "MERGE_MSG", "SQUASH_MSG", "TAG_EDITMSG"] {
444 map.insert(filename.to_string(), git_commit_scope.clone());
445 }
446
447 let gitignore_scope = "source.gitignore".to_string();
449 for filename in [".gitignore", ".dockerignore", ".npmignore", ".hgignore"] {
450 map.insert(filename.to_string(), gitignore_scope.clone());
451 }
452
453 let gitconfig_scope = "source.gitconfig".to_string();
455 for filename in [".gitconfig", ".gitmodules"] {
456 map.insert(filename.to_string(), gitconfig_scope.clone());
457 }
458
459 let gitattributes_scope = "source.gitattributes".to_string();
461 map.insert(".gitattributes".to_string(), gitattributes_scope);
462
463 let groovy_scope = "source.groovy".to_string();
465 map.insert("Jenkinsfile".to_string(), groovy_scope);
466
467 let ruby_scope = "source.ruby".to_string();
470 map.insert("Brewfile".to_string(), ruby_scope);
471
472 let dockerfile_scope = "source.dockerfile".to_string();
474 map.insert("Dockerfile".to_string(), dockerfile_scope.clone());
475 map.insert("Containerfile".to_string(), dockerfile_scope.clone());
476 map.insert("Dockerfile.dev".to_string(), dockerfile_scope.clone());
478 map.insert("Dockerfile.prod".to_string(), dockerfile_scope.clone());
479 map.insert("Dockerfile.test".to_string(), dockerfile_scope.clone());
480 map.insert("Dockerfile.build".to_string(), dockerfile_scope.clone());
481
482 let cmake_scope = "source.cmake".to_string();
484 map.insert("CMakeLists.txt".to_string(), cmake_scope);
485
486 let starlark_scope = "source.starlark".to_string();
488 map.insert("BUILD".to_string(), starlark_scope.clone());
489 map.insert("BUILD.bazel".to_string(), starlark_scope.clone());
490 map.insert("WORKSPACE".to_string(), starlark_scope.clone());
491 map.insert("WORKSPACE.bazel".to_string(), starlark_scope.clone());
492 map.insert("Tiltfile".to_string(), starlark_scope);
493
494 let justfile_scope = "source.justfile".to_string();
496 map.insert("justfile".to_string(), justfile_scope.clone());
497 map.insert("Justfile".to_string(), justfile_scope.clone());
498 map.insert(".justfile".to_string(), justfile_scope);
499
500 let ini_scope = "source.ini".to_string();
502 map.insert(".editorconfig".to_string(), ini_scope);
503
504 let earthfile_scope = "source.earthfile".to_string();
506 map.insert("Earthfile".to_string(), earthfile_scope);
507
508 let hyprlang_scope = "source.hyprlang".to_string();
510 map.insert("hyprland.conf".to_string(), hyprlang_scope.clone());
511 map.insert("hyprpaper.conf".to_string(), hyprlang_scope.clone());
512 map.insert("hyprlock.conf".to_string(), hyprlang_scope);
513
514 let gomod_scope = "source.gomod".to_string();
516 map.insert("go.mod".to_string(), gomod_scope.clone());
517 map.insert("go.sum".to_string(), gomod_scope);
518
519 map
520 }
521
522 pub(crate) fn add_embedded_grammars(builder: &mut SyntaxSetBuilder) {
524 match SyntaxDefinition::load_from_str(TOML_GRAMMAR, true, Some("TOML")) {
526 Ok(syntax) => {
527 builder.add(syntax);
528 tracing::debug!("Loaded embedded TOML grammar");
529 }
530 Err(e) => {
531 tracing::warn!("Failed to load embedded TOML grammar: {}", e);
532 }
533 }
534
535 match SyntaxDefinition::load_from_str(ODIN_GRAMMAR, true, Some("Odin")) {
537 Ok(syntax) => {
538 builder.add(syntax);
539 tracing::debug!("Loaded embedded Odin grammar");
540 }
541 Err(e) => {
542 tracing::warn!("Failed to load embedded Odin grammar: {}", e);
543 }
544 }
545
546 match SyntaxDefinition::load_from_str(ZIG_GRAMMAR, true, Some("Zig")) {
548 Ok(syntax) => {
549 builder.add(syntax);
550 tracing::debug!("Loaded embedded Zig grammar");
551 }
552 Err(e) => {
553 tracing::warn!("Failed to load embedded Zig grammar: {}", e);
554 }
555 }
556
557 match SyntaxDefinition::load_from_str(GIT_REBASE_GRAMMAR, true, Some("Git Rebase Todo")) {
559 Ok(syntax) => {
560 builder.add(syntax);
561 tracing::debug!("Loaded embedded Git Rebase Todo grammar");
562 }
563 Err(e) => {
564 tracing::warn!("Failed to load embedded Git Rebase Todo grammar: {}", e);
565 }
566 }
567
568 match SyntaxDefinition::load_from_str(GIT_COMMIT_GRAMMAR, true, Some("Git Commit Message"))
570 {
571 Ok(syntax) => {
572 builder.add(syntax);
573 tracing::debug!("Loaded embedded Git Commit Message grammar");
574 }
575 Err(e) => {
576 tracing::warn!("Failed to load embedded Git Commit Message grammar: {}", e);
577 }
578 }
579
580 match SyntaxDefinition::load_from_str(GITIGNORE_GRAMMAR, true, Some("Gitignore")) {
582 Ok(syntax) => {
583 builder.add(syntax);
584 tracing::debug!("Loaded embedded Gitignore grammar");
585 }
586 Err(e) => {
587 tracing::warn!("Failed to load embedded Gitignore grammar: {}", e);
588 }
589 }
590
591 match SyntaxDefinition::load_from_str(GITCONFIG_GRAMMAR, true, Some("Git Config")) {
593 Ok(syntax) => {
594 builder.add(syntax);
595 tracing::debug!("Loaded embedded Git Config grammar");
596 }
597 Err(e) => {
598 tracing::warn!("Failed to load embedded Git Config grammar: {}", e);
599 }
600 }
601
602 match SyntaxDefinition::load_from_str(GITATTRIBUTES_GRAMMAR, true, Some("Git Attributes")) {
604 Ok(syntax) => {
605 builder.add(syntax);
606 tracing::debug!("Loaded embedded Git Attributes grammar");
607 }
608 Err(e) => {
609 tracing::warn!("Failed to load embedded Git Attributes grammar: {}", e);
610 }
611 }
612
613 match SyntaxDefinition::load_from_str(TYPST_GRAMMAR, true, Some("Typst")) {
615 Ok(syntax) => {
616 builder.add(syntax);
617 tracing::debug!("Loaded embedded Typst grammar");
618 }
619 Err(e) => {
620 tracing::warn!("Failed to load embedded Typst grammar: {}", e);
621 }
622 }
623
624 let additional_grammars: &[(&str, &str)] = &[
626 (DOCKERFILE_GRAMMAR, "Dockerfile"),
627 (INI_GRAMMAR, "INI"),
628 (CMAKE_GRAMMAR, "CMake"),
629 (SCSS_GRAMMAR, "SCSS"),
630 (LESS_GRAMMAR, "LESS"),
631 (POWERSHELL_GRAMMAR, "PowerShell"),
632 (KOTLIN_GRAMMAR, "Kotlin"),
633 (SWIFT_GRAMMAR, "Swift"),
634 (DART_GRAMMAR, "Dart"),
635 (ELIXIR_GRAMMAR, "Elixir"),
636 (FSHARP_GRAMMAR, "FSharp"),
637 (NIX_GRAMMAR, "Nix"),
638 (HCL_GRAMMAR, "HCL"),
639 (PROTOBUF_GRAMMAR, "Protocol Buffers"),
640 (GRAPHQL_GRAMMAR, "GraphQL"),
641 (JULIA_GRAMMAR, "Julia"),
642 (NIM_GRAMMAR, "Nim"),
643 (GLEAM_GRAMMAR, "Gleam"),
644 (VLANG_GRAMMAR, "V"),
645 (SOLIDITY_GRAMMAR, "Solidity"),
646 (KDL_GRAMMAR, "KDL"),
647 (NUSHELL_GRAMMAR, "Nushell"),
648 (STARLARK_GRAMMAR, "Starlark"),
649 (JUSTFILE_GRAMMAR, "Justfile"),
650 (EARTHFILE_GRAMMAR, "Earthfile"),
651 (GOMOD_GRAMMAR, "Go Module"),
652 (VUE_GRAMMAR, "Vue"),
653 (SVELTE_GRAMMAR, "Svelte"),
654 (ASTRO_GRAMMAR, "Astro"),
655 (HYPRLANG_GRAMMAR, "Hyprlang"),
656 (AUTOHOTKEY_GRAMMAR, "AutoHotkey"),
657 (RACKET_GRAMMAR, "Racket"),
658 ];
659
660 for (grammar_str, name) in additional_grammars {
661 match SyntaxDefinition::load_from_str(grammar_str, true, Some(name)) {
662 Ok(syntax) => {
663 builder.add(syntax);
664 tracing::debug!("Loaded embedded {} grammar", name);
665 }
666 Err(e) => {
667 tracing::warn!("Failed to load embedded {} grammar: {}", name, e);
668 }
669 }
670 }
671 }
672
673 pub fn find_syntax_for_file(&self, path: &Path) -> Option<&SyntaxReference> {
679 let entry = self.find_by_path(path, None)?;
680 entry
681 .engines
682 .syntect
683 .map(|i| &self.syntax_set.syntaxes()[i])
684 }
685
686 pub fn find_syntax_by_name(&self, name: &str) -> Option<&SyntaxReference> {
694 if let Some(entry) = self.find_by_name(name) {
695 if let Some(idx) = entry.engines.syntect {
696 return Some(&self.syntax_set.syntaxes()[idx]);
697 }
698 }
699 self.syntax_set.find_syntax_by_name(name)
703 }
704
705 fn built_in_aliases() -> Vec<(&'static str, &'static str)> {
714 vec![
715 ("bash", "Bourne Again Shell (bash)"),
717 ("shell", "Bourne Again Shell (bash)"),
718 ("sh", "Bourne Again Shell (bash)"),
719 ("c++", "C++"),
720 ("cpp", "C++"),
721 ("csharp", "C#"),
722 ("objc", "Objective-C"),
723 ("objcpp", "Objective-C++"),
724 ("regex", "Regular Expressions (Python)"),
725 ("regexp", "Regular Expressions (Python)"),
726 ("proto", "Protocol Buffers"),
728 ("protobuf", "Protocol Buffers"),
729 ("gomod", "Go Module"),
730 ("git-rebase", "Git Rebase Todo"),
731 ("git-commit", "Git Commit Message"),
732 ("git-config", "Git Config"),
733 ("git-attributes", "Git Attributes"),
734 ("gitignore", "Gitignore"),
735 ("fsharp", "FSharp"),
736 ("f#", "FSharp"),
737 ("terraform", "HCL"),
738 ("tf", "HCL"),
739 ("ts", "TypeScript"),
740 ("js", "JavaScript"),
741 ("py", "Python"),
742 ("rb", "Ruby"),
743 ("rs", "Rust"),
744 ("md", "Markdown"),
745 ("yml", "YAML"),
746 ("dockerfile", "Dockerfile"),
747 ]
748 }
749
750 pub(crate) fn populate_built_in_aliases(&mut self) {
757 for (short, full) in Self::built_in_aliases() {
758 self.register_alias_inner(short, full, true);
759 }
760 self.rebuild_catalog();
761 }
762
763 pub(crate) fn register_alias(&mut self, short_name: &str, full_name: &str) -> bool {
773 if !self.register_alias_inner(short_name, full_name, false) {
774 return false;
775 }
776 let short_lower = short_name.to_lowercase();
777 let full_lower = full_name.to_lowercase();
778 if let Some(&idx) = self.catalog_by_name.get(&full_lower) {
779 self.catalog_by_name
780 .entry(short_lower.clone())
781 .or_insert(idx);
782 let entry = &mut self.catalog[idx];
783 let replace = match &entry.short_name {
784 None => true,
785 Some(existing) => short_name.len() < existing.len(),
786 };
787 if replace {
788 entry.short_name = Some(short_lower);
789 }
790 }
791 true
792 }
793
794 fn register_alias_inner(
795 &mut self,
796 short_name: &str,
797 full_name: &str,
798 is_built_in: bool,
799 ) -> bool {
800 let short_lower = short_name.to_lowercase();
801
802 let target_exists = self
804 .syntax_set
805 .syntaxes()
806 .iter()
807 .any(|s| s.name.eq_ignore_ascii_case(full_name));
808 if !target_exists {
809 if tree_sitter_for_syntect_name(full_name).is_some() {
813 return false;
814 }
815 if is_built_in {
816 tracing::warn!(
819 "[grammar-alias] Built-in alias '{}' -> '{}': target grammar not found, skipping",
820 short_name, full_name
821 );
822 } else {
823 tracing::warn!(
824 "[grammar-alias] Alias '{}' -> '{}': target grammar not found, skipping",
825 short_name,
826 full_name
827 );
828 }
829 return false;
830 }
831
832 let collides_with_full_name = self
834 .syntax_set
835 .syntaxes()
836 .iter()
837 .any(|s| s.name.eq_ignore_ascii_case(&short_lower));
838 if collides_with_full_name {
839 tracing::debug!(
843 "[grammar-alias] Alias '{}' matches an existing grammar name, skipping (not needed)",
844 short_name
845 );
846 return false;
847 }
848
849 if let Some(existing_target) = self.aliases.get(&short_lower) {
851 if existing_target.eq_ignore_ascii_case(full_name) {
852 return true;
854 }
855 let msg = format!(
856 "Alias '{}' already maps to '{}', cannot remap to '{}'",
857 short_name, existing_target, full_name
858 );
859 if is_built_in {
860 panic!("[grammar-alias] Built-in alias collision: {}", msg);
861 } else {
862 tracing::warn!("[grammar-alias] {}", msg);
863 return false;
864 }
865 }
866
867 let exact_name = self
869 .syntax_set
870 .syntaxes()
871 .iter()
872 .find(|s| s.name.eq_ignore_ascii_case(full_name))
873 .map(|s| s.name.clone())
874 .unwrap();
875
876 self.aliases.insert(short_lower, exact_name);
877 true
878 }
879
880 pub(crate) fn rebuild_catalog(&mut self) {
895 let mut short_by_full: HashMap<String, String> = HashMap::new();
902 let record = |map: &mut HashMap<String, String>, short: &str, full: &str| {
903 let key = full.to_lowercase();
904 let keep = match map.get(&key) {
905 None => true,
906 Some(existing) => short.len() < existing.len(),
907 };
908 if keep {
909 map.insert(key, short.to_string());
910 }
911 };
912 for (short, full) in Self::built_in_aliases() {
913 record(&mut short_by_full, short, full);
914 }
915 for (short, full) in &self.aliases {
916 record(&mut short_by_full, short, full);
917 }
918
919 let derive_language_id =
920 |display_name: &str| -> (String, Option<fresh_languages::Language>) {
921 let ts = tree_sitter_for_syntect_name(display_name);
922 let id = ts
923 .map(|l| l.id().to_string())
924 .unwrap_or_else(|| display_name.to_lowercase());
925 (id, ts)
926 };
927
928 let mut catalog: Vec<GrammarEntry> = Vec::new();
929 let mut scope_to_index: HashMap<String, usize> = HashMap::new();
930
931 for (idx, syntax) in self.syntax_set.syntaxes().iter().enumerate() {
941 if syntax.name == "Plain Text" {
942 continue;
943 }
944 let (language_id, tree_sitter) = derive_language_id(&syntax.name);
945 let short_name = short_by_full.get(&syntax.name.to_lowercase()).cloned();
946 let source = self
947 .grammar_sources
948 .get(&syntax.name)
949 .map(|info| info.source.clone())
950 .unwrap_or(GrammarSource::BuiltIn);
951 let entry_index = catalog.len();
952 scope_to_index.insert(syntax.scope.to_string(), entry_index);
953
954 let mut extensions = syntax.file_extensions.clone();
960 if let Some(lang) = tree_sitter {
961 for ext in lang.extensions() {
962 let ext = ext.to_string();
963 if !extensions.iter().any(|e| e == &ext) {
964 extensions.push(ext);
965 }
966 }
967 }
968
969 catalog.push(GrammarEntry {
970 display_name: syntax.name.clone(),
971 language_id,
972 short_name,
973 extensions,
974 filenames: Vec::new(),
975 filename_globs: Vec::new(),
976 source,
977 engines: GrammarEngines {
978 syntect: Some(idx),
979 tree_sitter,
980 },
981 });
982 }
983
984 for (filename, scope) in &self.filename_scopes {
986 if let Some(&idx) = scope_to_index.get(scope) {
987 if !catalog[idx].filenames.iter().any(|f| f == filename) {
988 catalog[idx].filenames.push(filename.clone());
989 }
990 }
991 }
992
993 for (ext, scope) in &self.user_extensions {
995 if let Some(&idx) = scope_to_index.get(scope) {
996 if !catalog[idx].extensions.iter().any(|e| e == ext) {
997 catalog[idx].extensions.push(ext.clone());
998 }
999 }
1000 }
1001
1002 let mut ts_covered: std::collections::HashSet<fresh_languages::Language> =
1007 std::collections::HashSet::new();
1008 for entry in &catalog {
1009 if let Some(lang) = entry.engines.tree_sitter {
1010 ts_covered.insert(lang);
1011 }
1012 }
1013 for lang in fresh_languages::Language::all() {
1014 if ts_covered.contains(lang) {
1015 continue;
1016 }
1017 let display_name = lang.display_name().to_string();
1018 let language_id = lang.id().to_string();
1019 let short_name = short_by_full.get(&display_name.to_lowercase()).cloned();
1020 let extensions: Vec<String> = lang.extensions().iter().map(|s| s.to_string()).collect();
1021 catalog.push(GrammarEntry {
1022 display_name,
1023 language_id,
1024 short_name,
1025 extensions,
1026 filenames: Vec::new(),
1027 filename_globs: Vec::new(),
1028 source: GrammarSource::BuiltIn,
1029 engines: GrammarEngines {
1030 syntect: None,
1031 tree_sitter: Some(*lang),
1032 },
1033 });
1034 }
1035
1036 let mut by_name: HashMap<String, usize> = HashMap::new();
1044 let mut by_extension: HashMap<String, usize> = HashMap::new();
1045 let mut by_filename: HashMap<String, usize> = HashMap::new();
1046 for (idx, entry) in catalog.iter().enumerate() {
1047 by_name.insert(entry.display_name.to_lowercase(), idx);
1048 by_name.insert(entry.language_id.to_lowercase(), idx);
1049 if let Some(short) = &entry.short_name {
1050 by_name.insert(short.to_lowercase(), idx);
1051 }
1052 for ext in &entry.extensions {
1053 by_extension.entry(ext.to_lowercase()).or_insert(idx);
1054 by_filename.entry(ext.clone()).or_insert(idx);
1055 }
1056 for filename in &entry.filenames {
1057 by_filename.entry(filename.clone()).or_insert(idx);
1058 }
1059 }
1060
1061 self.catalog = catalog;
1062 self.catalog_by_name = by_name;
1063 self.catalog_by_extension = by_extension;
1064 self.catalog_by_filename = by_filename;
1065
1066 if !self.applied_language_config.is_empty() {
1070 let cfg = std::mem::take(&mut self.applied_language_config);
1071 self.apply_language_config_inner(&cfg);
1072 self.applied_language_config = cfg;
1073 }
1074 self.catalog_gen = self.catalog_gen.wrapping_add(1);
1075 }
1076
1077 pub fn catalog(&self) -> &[GrammarEntry] {
1079 &self.catalog
1080 }
1081
1082 pub fn catalog_gen(&self) -> u64 {
1086 self.catalog_gen
1087 }
1088
1089 pub fn find_by_name(&self, name: &str) -> Option<&GrammarEntry> {
1095 self.catalog_by_name
1096 .get(&name.to_lowercase())
1097 .map(|&idx| &self.catalog[idx])
1098 }
1099
1100 pub fn find_by_path(&self, path: &Path, first_line: Option<&str>) -> Option<&GrammarEntry> {
1121 let filename = path.file_name().and_then(|n| n.to_str());
1122 let path_str = path.to_str().unwrap_or("");
1123
1124 if let Some(name) = filename {
1125 if let Some(&idx) = self.catalog_by_filename.get(name) {
1126 return Some(&self.catalog[idx]);
1127 }
1128 }
1129
1130 if let Some(name) = filename {
1132 for entry in &self.catalog {
1133 for pattern in &entry.filename_globs {
1134 let matched = if is_path_pattern(pattern) {
1135 path_glob_matches(pattern, path_str)
1136 } else {
1137 filename_glob_matches(pattern, name)
1138 };
1139 if matched {
1140 return Some(entry);
1141 }
1142 }
1143 }
1144 }
1145
1146 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1147 if let Some(entry) = self.find_by_extension(ext) {
1148 return Some(entry);
1149 }
1150 }
1151
1152 let line = first_line?;
1157 let syntax = self.syntax_set.find_syntax_by_first_line(line)?;
1158 self.find_by_name(&syntax.name)
1159 }
1160
1161 pub fn find_by_extension(&self, ext: &str) -> Option<&GrammarEntry> {
1163 self.catalog_by_extension
1164 .get(&ext.to_lowercase())
1165 .map(|&idx| &self.catalog[idx])
1166 }
1167
1168 pub fn apply_language_config(
1181 &mut self,
1182 languages: &HashMap<String, crate::config::LanguageConfig>,
1183 ) {
1184 self.applied_language_config = languages.clone();
1185 self.apply_language_config_inner(languages);
1186 self.catalog_gen = self.catalog_gen.wrapping_add(1);
1187 }
1188
1189 fn apply_language_config_inner(
1194 &mut self,
1195 languages: &HashMap<String, crate::config::LanguageConfig>,
1196 ) {
1197 for (lang_id, lang_cfg) in languages {
1198 let grammar_name = if lang_cfg.grammar.is_empty() {
1199 lang_id.as_str()
1200 } else {
1201 lang_cfg.grammar.as_str()
1202 };
1203
1204 let idx = self
1206 .catalog_by_name
1207 .get(&grammar_name.to_lowercase())
1208 .copied()
1209 .or_else(|| self.catalog_by_name.get(&lang_id.to_lowercase()).copied())
1210 .unwrap_or_else(|| {
1211 let idx = self.catalog.len();
1212 self.catalog.push(GrammarEntry {
1213 display_name: lang_id.clone(),
1214 language_id: lang_id.clone(),
1215 short_name: None,
1216 extensions: Vec::new(),
1217 filenames: Vec::new(),
1218 filename_globs: Vec::new(),
1219 source: GrammarSource::BuiltIn,
1220 engines: GrammarEngines::default(),
1221 });
1222 idx
1223 });
1224
1225 self.catalog_by_name
1230 .entry(lang_id.to_lowercase())
1231 .or_insert(idx);
1232
1233 for ext in &lang_cfg.extensions {
1234 if !self.catalog[idx].extensions.iter().any(|e| e == ext) {
1235 self.catalog[idx].extensions.push(ext.clone());
1236 }
1237 self.catalog_by_extension.insert(ext.to_lowercase(), idx);
1239 }
1240 for filename in &lang_cfg.filenames {
1241 if is_glob_pattern(filename) {
1242 if !self.catalog[idx]
1243 .filename_globs
1244 .iter()
1245 .any(|f| f == filename)
1246 {
1247 self.catalog[idx].filename_globs.push(filename.clone());
1248 }
1249 } else {
1250 if !self.catalog[idx].filenames.iter().any(|f| f == filename) {
1251 self.catalog[idx].filenames.push(filename.clone());
1252 }
1253 self.catalog_by_filename.insert(filename.clone(), idx);
1254 }
1255 }
1256 }
1257 }
1258
1259 pub fn syntax_set(&self) -> &Arc<SyntaxSet> {
1261 &self.syntax_set
1262 }
1263
1264 pub fn syntax_set_arc(&self) -> Arc<SyntaxSet> {
1266 Arc::clone(&self.syntax_set)
1267 }
1268
1269 pub fn available_syntaxes(&self) -> Vec<&str> {
1271 self.syntax_set
1272 .syntaxes()
1273 .iter()
1274 .map(|s| s.name.as_str())
1275 .collect()
1276 }
1277
1278 pub fn available_grammar_info(&self) -> Vec<GrammarInfo> {
1285 let mut result: Vec<GrammarInfo> = self
1286 .catalog
1287 .iter()
1288 .map(|entry| GrammarInfo {
1289 name: entry.display_name.clone(),
1290 source: entry.source.clone(),
1291 file_extensions: entry.extensions.clone(),
1292 short_name: entry.short_name.clone(),
1293 })
1294 .collect();
1295 result.sort_by(|a, b| a.name.to_lowercase().cmp(&b.name.to_lowercase()));
1296 result
1297 }
1298
1299 pub(crate) fn grammar_sources(&self) -> &HashMap<String, GrammarInfo> {
1301 &self.grammar_sources
1302 }
1303
1304 pub(crate) fn build_grammar_sources_from_syntax_set(
1308 syntax_set: &SyntaxSet,
1309 ) -> HashMap<String, GrammarInfo> {
1310 let mut sources = HashMap::new();
1311 for syntax in syntax_set.syntaxes() {
1312 sources.insert(
1313 syntax.name.clone(),
1314 GrammarInfo {
1315 name: syntax.name.clone(),
1316 source: GrammarSource::BuiltIn,
1317 file_extensions: syntax.file_extensions.clone(),
1318 short_name: None,
1319 },
1320 );
1321 }
1322 sources
1323 }
1324
1325 #[cfg(test)]
1327 pub(crate) fn user_extensions(&self) -> &HashMap<String, String> {
1328 &self.user_extensions
1329 }
1330
1331 #[cfg(test)]
1333 pub(crate) fn loaded_grammar_paths(&self) -> &[GrammarSpec] {
1334 &self.loaded_grammar_paths
1335 }
1336
1337 pub fn with_additional_grammars(
1351 base: &GrammarRegistry,
1352 additional: &[GrammarSpec],
1353 ) -> Option<Self> {
1354 tracing::info!(
1355 "[SYNTAX DEBUG] with_additional_grammars: adding {} grammars to base with {} syntaxes",
1356 additional.len(),
1357 base.syntax_set.syntaxes().len()
1358 );
1359
1360 let mut builder = (*base.syntax_set).clone().into_builder();
1364
1365 let mut user_extensions = base.user_extensions.clone();
1367
1368 let mut loaded_grammar_paths = base.loaded_grammar_paths.clone();
1370
1371 let mut grammar_sources = base.grammar_sources.clone();
1373
1374 for spec in additional {
1376 tracing::info!(
1377 "[SYNTAX DEBUG] loading new grammar file: lang='{}', path={:?}, extensions={:?}",
1378 spec.language,
1379 spec.path,
1380 spec.extensions
1381 );
1382 match Self::load_grammar_file(&spec.path) {
1383 Ok(syntax) => {
1384 let scope = syntax.scope.to_string();
1385 let syntax_name = syntax.name.clone();
1386 tracing::info!(
1387 "[SYNTAX DEBUG] grammar loaded successfully: name='{}', scope='{}'",
1388 syntax_name,
1389 scope
1390 );
1391 builder.add(syntax);
1392 tracing::info!(
1393 "Loaded grammar for '{}' from {:?} with extensions {:?}",
1394 spec.language,
1395 spec.path,
1396 spec.extensions
1397 );
1398 for ext in &spec.extensions {
1400 user_extensions.insert(ext.clone(), scope.clone());
1401 }
1402 grammar_sources.insert(
1404 syntax_name.clone(),
1405 GrammarInfo {
1406 name: syntax_name,
1407 source: GrammarSource::Plugin {
1408 plugin: spec.language.clone(),
1409 path: spec.path.clone(),
1410 },
1411 file_extensions: spec.extensions.clone(),
1412 short_name: None,
1413 },
1414 );
1415 loaded_grammar_paths.push(spec.clone());
1417 }
1418 Err(e) => {
1419 tracing::warn!(
1420 "Failed to load grammar for '{}' from {:?}: {}",
1421 spec.language,
1422 spec.path,
1423 e
1424 );
1425 }
1426 }
1427 }
1428
1429 let mut reg = Self {
1430 syntax_set: Arc::new(builder.build()),
1431 user_extensions,
1432 filename_scopes: base.filename_scopes.clone(),
1433 loaded_grammar_paths,
1434 grammar_sources,
1435 aliases: base.aliases.clone(),
1436 catalog: Vec::new(),
1437 catalog_by_name: HashMap::new(),
1438 catalog_by_extension: HashMap::new(),
1439 catalog_by_filename: HashMap::new(),
1440 applied_language_config: HashMap::new(),
1441 catalog_gen: 0,
1442 };
1443 reg.rebuild_catalog();
1444 Some(reg)
1445 }
1446
1447 pub(crate) fn load_grammar_file(path: &Path) -> Result<SyntaxDefinition, String> {
1453 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
1454
1455 match ext {
1456 "sublime-syntax" => {
1457 let content = std::fs::read_to_string(path)
1458 .map_err(|e| format!("Failed to read file: {}", e))?;
1459 SyntaxDefinition::load_from_str(
1460 &content,
1461 true,
1462 path.file_stem().and_then(|s| s.to_str()),
1463 )
1464 .map_err(|e| format!("Failed to parse sublime-syntax: {}", e))
1465 }
1466 _ => Err(format!(
1467 "Unsupported grammar format: .{}. Only .sublime-syntax is supported.",
1468 ext
1469 )),
1470 }
1471 }
1472}
1473
1474impl Default for GrammarRegistry {
1475 fn default() -> Self {
1476 let defaults = SyntaxSet::load_defaults_newlines();
1478 let mut builder = defaults.into_builder();
1479 Self::add_embedded_grammars(&mut builder);
1480 let syntax_set = builder.build();
1481 let filename_scopes = Self::build_filename_scopes();
1482 let extra_extensions = Self::build_extra_extensions();
1483
1484 let mut registry = Self::new(syntax_set, extra_extensions, filename_scopes);
1485 registry.populate_built_in_aliases();
1486 registry.rebuild_catalog();
1487 registry
1488 }
1489}
1490
1491#[derive(Debug, Deserialize)]
1494pub struct PackageManifest {
1495 #[serde(default)]
1496 pub contributes: Option<Contributes>,
1497}
1498
1499#[derive(Debug, Deserialize, Default)]
1500pub struct Contributes {
1501 #[serde(default)]
1502 pub languages: Vec<LanguageContribution>,
1503 #[serde(default)]
1504 pub grammars: Vec<GrammarContribution>,
1505}
1506
1507#[derive(Debug, Deserialize)]
1508pub struct LanguageContribution {
1509 pub id: String,
1510 #[serde(default)]
1511 pub extensions: Vec<String>,
1512}
1513
1514#[derive(Debug, Deserialize)]
1515pub struct GrammarContribution {
1516 pub language: String,
1517 #[serde(rename = "scopeName")]
1518 pub scope_name: String,
1519 pub path: String,
1520}
1521
1522#[cfg(test)]
1523mod tests {
1524 use super::*;
1525
1526 #[test]
1527 fn test_empty_registry() {
1528 let registry = GrammarRegistry::empty();
1529 assert!(!registry.available_syntaxes().is_empty());
1531 }
1532
1533 #[test]
1534 fn test_default_registry() {
1535 let registry = GrammarRegistry::default();
1536 assert!(!registry.available_syntaxes().is_empty());
1538 }
1539
1540 #[test]
1541 fn test_find_syntax_for_common_extensions() {
1542 let registry = GrammarRegistry::default();
1543
1544 let test_cases = [
1546 ("test.py", true),
1547 ("test.rs", true),
1548 ("test.js", true),
1549 ("test.json", true),
1550 ("test.md", true),
1551 ("test.html", true),
1552 ("test.css", true),
1553 ("test.unknown_extension_xyz", false),
1554 ];
1555
1556 for (filename, should_exist) in test_cases {
1557 let path = Path::new(filename);
1558 let result = registry.find_syntax_for_file(path);
1559 assert_eq!(
1560 result.is_some(),
1561 should_exist,
1562 "Expected {:?} for {}",
1563 should_exist,
1564 filename
1565 );
1566 }
1567 }
1568
1569 #[test]
1570 fn test_racket_grammar_loaded() {
1571 let registry = GrammarRegistry::default();
1572 for filename in ["main.rkt", "data.rktd", "info.rktl", "doc.scrbl"] {
1573 let result = registry.find_syntax_for_file(Path::new(filename));
1574 assert!(
1575 result.is_some(),
1576 "Racket grammar should be available for {}",
1577 filename
1578 );
1579 let entry = registry.find_by_path(Path::new(filename), None).unwrap();
1580 assert_eq!(entry.display_name, "Racket", "for {}", filename);
1581 }
1582 }
1583
1584 #[test]
1585 fn test_syntax_set_arc() {
1586 let registry = GrammarRegistry::default();
1587 let arc1 = registry.syntax_set_arc();
1588 let arc2 = registry.syntax_set_arc();
1589 assert!(Arc::ptr_eq(&arc1, &arc2));
1591 }
1592
1593 #[test]
1594 fn test_shell_dotfiles_detection() {
1595 let registry = GrammarRegistry::default();
1596
1597 let shell_files = [".zshrc", ".zprofile", ".zshenv", ".bash_aliases"];
1599
1600 for filename in shell_files {
1601 let path = Path::new(filename);
1602 let result = registry.find_syntax_for_file(path);
1603 assert!(
1604 result.is_some(),
1605 "{} should be detected as a syntax",
1606 filename
1607 );
1608 let syntax = result.unwrap();
1609 assert!(
1611 syntax.name.to_lowercase().contains("bash")
1612 || syntax.name.to_lowercase().contains("shell"),
1613 "{} should be detected as shell/bash, got: {}",
1614 filename,
1615 syntax.name
1616 );
1617 }
1618 }
1619
1620 #[test]
1621 fn test_pkgbuild_detection() {
1622 let registry = GrammarRegistry::default();
1623
1624 for filename in ["PKGBUILD", "APKBUILD"] {
1626 let path = Path::new(filename);
1627 let result = registry.find_syntax_for_file(path);
1628 assert!(
1629 result.is_some(),
1630 "{} should be detected as a syntax",
1631 filename
1632 );
1633 let syntax = result.unwrap();
1634 assert!(
1636 syntax.name.to_lowercase().contains("bash")
1637 || syntax.name.to_lowercase().contains("shell"),
1638 "{} should be detected as shell/bash, got: {}",
1639 filename,
1640 syntax.name
1641 );
1642 }
1643 }
1644
1645 #[test]
1646 fn test_find_syntax_with_glob_filenames() {
1647 let mut registry = GrammarRegistry::default();
1648 let mut languages = std::collections::HashMap::new();
1649 languages.insert(
1650 "shell-configs".to_string(),
1651 crate::config::LanguageConfig {
1652 extensions: vec!["sh".to_string()],
1653 filenames: vec!["*.conf".to_string(), "*rc".to_string()],
1654 grammar: "bash".to_string(),
1655 comment_prefix: Some("#".to_string()),
1656 auto_indent: true,
1657 auto_close: None,
1658 auto_surround: None,
1659 textmate_grammar: None,
1660 show_whitespace_tabs: true,
1661 line_wrap: None,
1662 wrap_column: None,
1663 page_view: None,
1664 page_width: None,
1665 use_tabs: None,
1666 tab_size: None,
1667 formatter: None,
1668 format_on_save: false,
1669 on_save: vec![],
1670 word_characters: None,
1671 },
1672 );
1673 registry.apply_language_config(&languages);
1674
1675 assert!(
1676 registry
1677 .find_by_path(Path::new("nftables.conf"), None)
1678 .is_some(),
1679 "*.conf should match nftables.conf"
1680 );
1681 assert!(
1682 registry.find_by_path(Path::new("lfrc"), None).is_some(),
1683 "*rc should match lfrc"
1684 );
1685 let _ = registry.find_by_path(Path::new("randomfile"), None);
1687 }
1688
1689 #[test]
1690 fn test_find_syntax_with_path_glob_filenames() {
1691 let mut registry = GrammarRegistry::default();
1692 let mut languages = std::collections::HashMap::new();
1693 languages.insert(
1694 "shell-configs".to_string(),
1695 crate::config::LanguageConfig {
1696 extensions: vec!["sh".to_string()],
1697 filenames: vec!["/etc/**/rc.*".to_string()],
1698 grammar: "bash".to_string(),
1699 comment_prefix: Some("#".to_string()),
1700 auto_indent: true,
1701 auto_close: None,
1702 auto_surround: None,
1703 textmate_grammar: None,
1704 show_whitespace_tabs: true,
1705 line_wrap: None,
1706 wrap_column: None,
1707 page_view: None,
1708 page_width: None,
1709 use_tabs: None,
1710 tab_size: None,
1711 formatter: None,
1712 format_on_save: false,
1713 on_save: vec![],
1714 word_characters: None,
1715 },
1716 );
1717 registry.apply_language_config(&languages);
1718
1719 assert!(
1720 registry
1721 .find_by_path(Path::new("/etc/rc.conf"), None)
1722 .is_some(),
1723 "/etc/**/rc.* should match /etc/rc.conf"
1724 );
1725 assert!(
1726 registry
1727 .find_by_path(Path::new("/etc/init/rc.local"), None)
1728 .is_some(),
1729 "/etc/**/rc.* should match /etc/init/rc.local"
1730 );
1731 let _ = registry.find_by_path(Path::new("/var/rc.conf"), None);
1732 }
1733
1734 #[test]
1735 fn test_exact_filename_takes_priority_over_glob() {
1736 let mut registry = GrammarRegistry::default();
1737 let mut languages = std::collections::HashMap::new();
1738
1739 languages.insert(
1741 "custom-lfrc".to_string(),
1742 crate::config::LanguageConfig {
1743 extensions: vec![],
1744 filenames: vec!["lfrc".to_string()],
1745 grammar: "python".to_string(),
1746 comment_prefix: Some("#".to_string()),
1747 auto_indent: true,
1748 auto_close: None,
1749 auto_surround: None,
1750 textmate_grammar: None,
1751 show_whitespace_tabs: true,
1752 line_wrap: None,
1753 wrap_column: None,
1754 page_view: None,
1755 page_width: None,
1756 use_tabs: None,
1757 tab_size: None,
1758 formatter: None,
1759 format_on_save: false,
1760 on_save: vec![],
1761 word_characters: None,
1762 },
1763 );
1764
1765 languages.insert(
1767 "rc-files".to_string(),
1768 crate::config::LanguageConfig {
1769 extensions: vec![],
1770 filenames: vec!["*rc".to_string()],
1771 grammar: "bash".to_string(),
1772 comment_prefix: Some("#".to_string()),
1773 auto_indent: true,
1774 auto_close: None,
1775 auto_surround: None,
1776 textmate_grammar: None,
1777 show_whitespace_tabs: true,
1778 line_wrap: None,
1779 wrap_column: None,
1780 page_view: None,
1781 page_width: None,
1782 use_tabs: None,
1783 tab_size: None,
1784 formatter: None,
1785 format_on_save: false,
1786 on_save: vec![],
1787 word_characters: None,
1788 },
1789 );
1790
1791 registry.apply_language_config(&languages);
1792
1793 let entry = registry.find_by_path(Path::new("lfrc"), None).unwrap();
1795 assert!(
1796 entry.display_name.to_lowercase().contains("python"),
1797 "exact match should win over glob, got: {}",
1798 entry.display_name
1799 );
1800 }
1801
1802 #[test]
1803 fn test_built_in_aliases_resolve() {
1804 let registry = GrammarRegistry::default();
1805
1806 let syntax = registry.find_syntax_by_name("bash");
1808 assert!(syntax.is_some(), "alias 'bash' should resolve");
1809 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1810
1811 let syntax = registry.find_syntax_by_name("cpp");
1813 assert!(syntax.is_some(), "alias 'cpp' should resolve");
1814 assert_eq!(syntax.unwrap().name, "C++");
1815
1816 let syntax = registry.find_syntax_by_name("csharp");
1818 assert!(syntax.is_some(), "alias 'csharp' should resolve");
1819 assert_eq!(syntax.unwrap().name, "C#");
1820
1821 let syntax = registry.find_syntax_by_name("sh");
1823 assert!(syntax.is_some(), "alias 'sh' should resolve");
1824 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1825
1826 let syntax = registry.find_syntax_by_name("proto");
1828 assert!(syntax.is_some(), "alias 'proto' should resolve");
1829 assert_eq!(syntax.unwrap().name, "Protocol Buffers");
1830 }
1831
1832 #[test]
1833 fn test_alias_case_insensitive_input() {
1834 let registry = GrammarRegistry::default();
1835
1836 let syntax = registry.find_syntax_by_name("BASH");
1838 assert!(
1839 syntax.is_some(),
1840 "alias 'BASH' should resolve case-insensitively"
1841 );
1842 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1843
1844 let syntax = registry.find_syntax_by_name("Cpp");
1845 assert!(
1846 syntax.is_some(),
1847 "alias 'Cpp' should resolve case-insensitively"
1848 );
1849 assert_eq!(syntax.unwrap().name, "C++");
1850 }
1851
1852 #[test]
1853 fn test_full_name_still_works() {
1854 let registry = GrammarRegistry::default();
1855
1856 let syntax = registry.find_syntax_by_name("Bourne Again Shell (bash)");
1858 assert!(syntax.is_some(), "full name should still resolve");
1859 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1860
1861 let syntax = registry.find_syntax_by_name("bourne again shell (bash)");
1863 assert!(
1864 syntax.is_some(),
1865 "case-insensitive full name should resolve"
1866 );
1867 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1868 }
1869
1870 #[test]
1871 fn test_alias_does_not_shadow_full_names() {
1872 let registry = GrammarRegistry::default();
1873
1874 let syntax = registry.find_syntax_by_name("rust");
1876 assert!(syntax.is_some());
1877 assert_eq!(syntax.unwrap().name, "Rust");
1878
1879 let syntax = registry.find_syntax_by_name("go");
1881 assert!(syntax.is_some());
1882 assert_eq!(syntax.unwrap().name, "Go");
1883 }
1884
1885 #[test]
1886 fn test_register_alias_rejects_collision() {
1887 let mut registry = GrammarRegistry::default();
1888
1889 assert!(registry.register_alias("myalias", "Rust"));
1891 assert!(!registry.register_alias("myalias", "Go"));
1892
1893 assert!(registry.register_alias("myalias", "Rust"));
1895 }
1896
1897 #[test]
1898 fn test_register_alias_rejects_nonexistent_target() {
1899 let mut registry = GrammarRegistry::default();
1900 assert!(!registry.register_alias("nope", "Nonexistent Grammar"));
1901 }
1902
1903 #[test]
1904 fn test_register_alias_skips_existing_grammar_name() {
1905 let mut registry = GrammarRegistry::default();
1906
1907 assert!(!registry.register_alias("rust", "Rust"));
1909 assert!(registry.find_syntax_by_name("rust").is_some());
1911 }
1912
1913 #[test]
1914 fn test_available_grammar_info_includes_short_names() {
1915 let registry = GrammarRegistry::default();
1916 let infos = registry.available_grammar_info();
1917
1918 let bash_info = infos.iter().find(|g| g.name == "Bourne Again Shell (bash)");
1919 assert!(bash_info.is_some(), "bash grammar should be in the list");
1920 let bash_info = bash_info.unwrap();
1921 assert!(
1922 bash_info.short_name.is_some(),
1923 "bash grammar should have a short_name"
1924 );
1925 assert_eq!(bash_info.short_name.as_deref(), Some("sh"));
1927 }
1928
1929 #[test]
1930 fn test_catalog_contains_each_language_once() {
1931 let registry = GrammarRegistry::default();
1932 let catalog = registry.catalog();
1933
1934 let mut seen = std::collections::HashSet::new();
1936 for entry in catalog {
1937 let key = entry.display_name.to_lowercase();
1938 assert!(
1939 seen.insert(key.clone()),
1940 "duplicate catalog entry for display_name={:?}",
1941 entry.display_name
1942 );
1943 }
1944
1945 let ts = registry
1948 .find_by_name("TypeScript")
1949 .expect("TypeScript must be in the catalog");
1950 assert!(ts.engines.syntect.is_none());
1951 assert_eq!(
1952 ts.engines.tree_sitter,
1953 Some(fresh_languages::Language::TypeScript)
1954 );
1955 assert_eq!(ts.language_id, "typescript");
1956 assert!(ts.extensions.iter().any(|e| e == "ts"));
1957
1958 for name in ["Rust", "Python", "JavaScript"] {
1961 let entry = registry
1962 .find_by_name(name)
1963 .unwrap_or_else(|| panic!("{} must be in the catalog", name));
1964 assert!(
1965 entry.engines.syntect.is_some(),
1966 "{} should have a syntect index",
1967 name
1968 );
1969 assert!(
1970 entry.engines.tree_sitter.is_some(),
1971 "{} should also have a tree-sitter language",
1972 name
1973 );
1974 let by_id = registry
1977 .find_by_name(&entry.language_id)
1978 .expect("language_id should resolve");
1979 assert_eq!(by_id.display_name, entry.display_name);
1980 }
1981 }
1982
1983 #[test]
1984 fn test_catalog_find_by_path_and_extension() {
1985 let registry = GrammarRegistry::default();
1986 let ts = registry
1987 .find_by_path(Path::new("foo.ts"), None)
1988 .expect("foo.ts should resolve");
1989 assert_eq!(ts.display_name, "TypeScript");
1990 let rs = registry.find_by_extension("rs").expect("rs should resolve");
1991 assert_eq!(rs.display_name, "Rust");
1992 }
1993
1994 fn lang_cfg(
1996 grammar: &str,
1997 extensions: &[&str],
1998 filenames: &[&str],
1999 ) -> crate::config::LanguageConfig {
2000 crate::config::LanguageConfig {
2001 extensions: extensions.iter().map(|s| s.to_string()).collect(),
2002 filenames: filenames.iter().map(|s| s.to_string()).collect(),
2003 grammar: grammar.to_string(),
2004 comment_prefix: None,
2005 auto_indent: true,
2006 auto_close: None,
2007 auto_surround: None,
2008 textmate_grammar: None,
2009 show_whitespace_tabs: true,
2010 line_wrap: None,
2011 wrap_column: None,
2012 page_view: None,
2013 page_width: None,
2014 use_tabs: None,
2015 tab_size: None,
2016 formatter: None,
2017 format_on_save: false,
2018 on_save: vec![],
2019 word_characters: None,
2020 }
2021 }
2022
2023 #[test]
2027 fn test_user_alias_resolves_via_find_by_name() {
2028 let mut registry = GrammarRegistry::default();
2029 let mut languages = std::collections::HashMap::new();
2030 languages.insert("mylang".to_string(), lang_cfg("Rust", &[], &[]));
2031 registry.apply_language_config(&languages);
2032
2033 let entry = registry
2034 .find_by_name("mylang")
2035 .expect("user-declared alias 'mylang' must resolve");
2036 assert_eq!(entry.display_name, "Rust");
2037 }
2038
2039 #[test]
2043 fn test_register_alias_preserves_applied_language_config() {
2044 let mut registry = GrammarRegistry::default();
2045 let mut languages = std::collections::HashMap::new();
2046 languages.insert(
2047 "shell-configs".to_string(),
2048 lang_cfg("bash", &["myconf"], &["*.myconf"]),
2049 );
2050 registry.apply_language_config(&languages);
2051
2052 assert!(registry.find_by_extension("myconf").is_some());
2054 assert!(
2055 registry
2056 .find_by_path(Path::new("foo.myconf"), None)
2057 .is_some(),
2058 "glob should match before register_alias"
2059 );
2060
2061 registry.register_alias("mycustom", "Rust");
2063
2064 assert!(
2065 registry.find_by_extension("myconf").is_some(),
2066 "config extension must survive register_alias"
2067 );
2068 assert!(
2069 registry
2070 .find_by_path(Path::new("foo.myconf"), None)
2071 .is_some(),
2072 "glob must survive register_alias"
2073 );
2074 }
2075
2076 #[test]
2080 fn test_from_syntax_name_preserves_canonical_display_name() {
2081 use crate::primitives::detected_language::DetectedLanguage;
2082 let registry = GrammarRegistry::default();
2083 let languages = std::collections::HashMap::new();
2084
2085 let detected = DetectedLanguage::from_syntax_name("BASH", ®istry, &languages)
2086 .expect("BASH should resolve via alias");
2087 assert_eq!(
2088 detected.display_name, "Bourne Again Shell (bash)",
2089 "display_name must be canonical, not user-typed"
2090 );
2091 }
2092
2093 #[test]
2097 fn test_config_only_language_appears_in_catalog() {
2098 let mut registry = GrammarRegistry::default();
2099 let mut languages = std::collections::HashMap::new();
2100 languages.insert("fish".to_string(), lang_cfg("fish", &["fish"], &[]));
2102 registry.apply_language_config(&languages);
2103
2104 let entry = registry
2105 .find_by_name("fish")
2106 .expect("fish should be in the catalog after apply_language_config");
2107 assert!(entry.engines.syntect.is_none());
2108 assert!(entry.engines.tree_sitter.is_none());
2109 assert_eq!(entry.language_id, "fish");
2110 assert!(entry.extensions.iter().any(|e| e == "fish"));
2111 }
2112
2113 #[test]
2118 fn test_config_extension_overrides_builtin() {
2119 let mut registry = GrammarRegistry::default();
2120 assert_eq!(
2122 registry.find_by_extension("js").unwrap().display_name,
2123 "JavaScript"
2124 );
2125
2126 let mut languages = std::collections::HashMap::new();
2127 languages.insert(
2128 "ts-overlay".to_string(),
2129 lang_cfg("TypeScript", &["js"], &[]),
2130 );
2131 registry.apply_language_config(&languages);
2132
2133 assert_eq!(
2134 registry.find_by_extension("js").unwrap().display_name,
2135 "TypeScript",
2136 "user-config extension must win over built-in"
2137 );
2138 }
2139
2140 #[test]
2147 fn test_bare_filename_resolves_via_find_by_path() {
2148 let registry = GrammarRegistry::default();
2149 for (filename, expected_substr) in [
2150 ("Gemfile", "ruby"),
2151 ("Rakefile", "ruby"),
2152 ("Vagrantfile", "ruby"),
2153 ("Makefile", "makefile"),
2154 ("GNUmakefile", "makefile"),
2155 ] {
2156 let entry = registry
2157 .find_by_path(Path::new(filename), None)
2158 .unwrap_or_else(|| panic!("{} must resolve via catalog", filename));
2159 assert!(
2160 entry.display_name.to_lowercase().contains(expected_substr),
2161 "{} should resolve to {} grammar, got {}",
2162 filename,
2163 expected_substr,
2164 entry.display_name
2165 );
2166 }
2167 }
2168
2169 #[test]
2174 fn test_jsx_resolves_to_javascript() {
2175 let registry = GrammarRegistry::default();
2176 let entry = registry
2177 .find_by_path(Path::new("foo.jsx"), None)
2178 .expect("foo.jsx must resolve");
2179 assert_eq!(entry.display_name, "JavaScript");
2180 }
2181
2182 #[test]
2187 fn test_rebuild_catalog_replays_language_config() {
2188 let mut registry = GrammarRegistry::default();
2189 let mut languages = std::collections::HashMap::new();
2190 languages.insert(
2191 "myshell".to_string(),
2192 lang_cfg("bash", &["myext"], &["*.myglob"]),
2193 );
2194 registry.apply_language_config(&languages);
2195 assert!(registry.find_by_extension("myext").is_some());
2196 assert!(registry
2197 .find_by_path(Path::new("foo.myglob"), None)
2198 .is_some());
2199
2200 registry.rebuild_catalog();
2203 assert!(
2204 registry.find_by_extension("myext").is_some(),
2205 "rebuild_catalog must replay applied user config"
2206 );
2207 assert!(
2208 registry
2209 .find_by_path(Path::new("foo.myglob"), None)
2210 .is_some(),
2211 "rebuild_catalog must replay user globs"
2212 );
2213 }
2214
2215 #[test]
2218 fn test_apply_language_config_idempotent() {
2219 let mut registry = GrammarRegistry::default();
2220 let mut languages = std::collections::HashMap::new();
2221 languages.insert(
2222 "shell-cfg".to_string(),
2223 lang_cfg("bash", &["myconf"], &["*.myconf"]),
2224 );
2225
2226 registry.apply_language_config(&languages);
2227 let first_extensions = registry
2228 .find_by_name("bash")
2229 .unwrap()
2230 .extensions
2231 .iter()
2232 .filter(|e| e == &"myconf")
2233 .count();
2234 let first_globs = registry
2235 .find_by_name("bash")
2236 .unwrap()
2237 .filename_globs
2238 .iter()
2239 .filter(|g| g == &"*.myconf")
2240 .count();
2241 assert_eq!(first_extensions, 1);
2242 assert_eq!(first_globs, 1);
2243
2244 registry.apply_language_config(&languages);
2246 let second_extensions = registry
2247 .find_by_name("bash")
2248 .unwrap()
2249 .extensions
2250 .iter()
2251 .filter(|e| e == &"myconf")
2252 .count();
2253 let second_globs = registry
2254 .find_by_name("bash")
2255 .unwrap()
2256 .filename_globs
2257 .iter()
2258 .filter(|g| g == &"*.myconf")
2259 .count();
2260 assert_eq!(second_extensions, 1, "extensions must not duplicate");
2261 assert_eq!(second_globs, 1, "globs must not duplicate");
2262 }
2263
2264 #[test]
2268 fn test_tree_sitter_bridge() {
2269 assert_eq!(
2270 tree_sitter_for_syntect_name("Bourne Again Shell (bash)"),
2271 Some(fresh_languages::Language::Bash)
2272 );
2273 assert_eq!(
2274 tree_sitter_for_syntect_name("Rust"),
2275 Some(fresh_languages::Language::Rust)
2276 );
2277 assert_eq!(tree_sitter_for_syntect_name("Nushell"), None);
2279 assert_eq!(tree_sitter_for_syntect_name("does-not-exist"), None);
2281 }
2282}