1use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10use syntect::parsing::{SyntaxDefinition, SyntaxReference, SyntaxSet, SyntaxSetBuilder};
11
12pub use crate::primitives::glob_match::{
14 filename_glob_matches, is_glob_pattern, is_path_pattern, path_glob_matches,
15};
16
17#[derive(Clone, Debug)]
22pub struct GrammarSpec {
23 pub language: String,
25 pub path: PathBuf,
27 pub extensions: Vec<String>,
29}
30
31#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
33#[serde(tag = "type")]
34pub enum GrammarSource {
35 #[serde(rename = "built-in")]
37 BuiltIn,
38 #[serde(rename = "user")]
40 User { path: PathBuf },
41 #[serde(rename = "language-pack")]
43 LanguagePack { name: String, path: PathBuf },
44 #[serde(rename = "bundle")]
46 Bundle { name: String, path: PathBuf },
47 #[serde(rename = "plugin")]
49 Plugin { plugin: String, path: PathBuf },
50}
51
52impl std::fmt::Display for GrammarSource {
53 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54 match self {
55 GrammarSource::BuiltIn => write!(f, "built-in"),
56 GrammarSource::User { path } => write!(f, "user ({})", path.display()),
57 GrammarSource::LanguagePack { name, .. } => write!(f, "language-pack ({})", name),
58 GrammarSource::Bundle { name, .. } => write!(f, "bundle ({})", name),
59 GrammarSource::Plugin { plugin, .. } => write!(f, "plugin ({})", plugin),
60 }
61 }
62}
63
64#[derive(Clone, Debug, Serialize, Deserialize)]
66pub struct GrammarInfo {
67 pub name: String,
69 pub source: GrammarSource,
71 pub file_extensions: Vec<String>,
73 #[serde(default, skip_serializing_if = "Option::is_none")]
75 pub short_name: Option<String>,
76}
77
78const SYNTECT_TO_TREE_SITTER_ALIASES: &[(&str, fresh_languages::Language)] =
86 &[("Bourne Again Shell (bash)", fresh_languages::Language::Bash)];
87
88fn tree_sitter_for_syntect_name(display_name: &str) -> Option<fresh_languages::Language> {
91 for (syntect_name, lang) in SYNTECT_TO_TREE_SITTER_ALIASES {
92 if *syntect_name == display_name {
93 return Some(*lang);
94 }
95 }
96 fresh_languages::Language::all()
97 .iter()
98 .find(|l| l.display_name() == display_name)
99 .copied()
100}
101
102#[derive(Clone, Debug, Default)]
107pub struct GrammarEngines {
108 pub syntect: Option<usize>,
111 pub tree_sitter: Option<fresh_languages::Language>,
113}
114
115#[derive(Clone, Debug)]
124pub struct GrammarEntry {
125 pub display_name: String,
127 pub language_id: String,
129 pub short_name: Option<String>,
131 pub extensions: Vec<String>,
133 pub filenames: Vec<String>,
135 pub filename_globs: Vec<String>,
137 pub source: GrammarSource,
139 pub engines: GrammarEngines,
141}
142
143pub const TOML_GRAMMAR: &str = include_str!("../../grammars/toml.sublime-syntax");
145
146pub const ODIN_GRAMMAR: &str = include_str!("../../grammars/odin/Odin.sublime-syntax");
149
150pub const ZIG_GRAMMAR: &str = include_str!("../../grammars/zig.sublime-syntax");
152
153pub const GDSCRIPT_GRAMMAR: &str = include_str!("../../grammars/gdscript.sublime-syntax");
156
157pub const GIT_REBASE_GRAMMAR: &str = include_str!("../../grammars/git-rebase.sublime-syntax");
159
160pub const GIT_COMMIT_GRAMMAR: &str = include_str!("../../grammars/git-commit.sublime-syntax");
162
163pub const GITIGNORE_GRAMMAR: &str = include_str!("../../grammars/gitignore.sublime-syntax");
165
166pub const GITCONFIG_GRAMMAR: &str = include_str!("../../grammars/gitconfig.sublime-syntax");
168
169pub const GITATTRIBUTES_GRAMMAR: &str = include_str!("../../grammars/gitattributes.sublime-syntax");
171
172pub const TYPST_GRAMMAR: &str = include_str!("../../grammars/typst.sublime-syntax");
174
175pub const DOCKERFILE_GRAMMAR: &str = include_str!("../../grammars/dockerfile.sublime-syntax");
177pub const INI_GRAMMAR: &str = include_str!("../../grammars/ini.sublime-syntax");
179pub const CMAKE_GRAMMAR: &str = include_str!("../../grammars/cmake.sublime-syntax");
181pub const SCSS_GRAMMAR: &str = include_str!("../../grammars/scss.sublime-syntax");
183pub const LESS_GRAMMAR: &str = include_str!("../../grammars/less.sublime-syntax");
185pub const POWERSHELL_GRAMMAR: &str = include_str!("../../grammars/powershell.sublime-syntax");
187pub const KOTLIN_GRAMMAR: &str = include_str!("../../grammars/kotlin.sublime-syntax");
189pub const SWIFT_GRAMMAR: &str = include_str!("../../grammars/swift.sublime-syntax");
191pub const DART_GRAMMAR: &str = include_str!("../../grammars/dart.sublime-syntax");
193pub const ELIXIR_GRAMMAR: &str = include_str!("../../grammars/elixir.sublime-syntax");
195pub const FSHARP_GRAMMAR: &str = include_str!("../../grammars/fsharp.sublime-syntax");
197pub const NIX_GRAMMAR: &str = include_str!("../../grammars/nix.sublime-syntax");
199pub const HCL_GRAMMAR: &str = include_str!("../../grammars/hcl.sublime-syntax");
201pub const PROTOBUF_GRAMMAR: &str = include_str!("../../grammars/protobuf.sublime-syntax");
203pub const GRAPHQL_GRAMMAR: &str = include_str!("../../grammars/graphql.sublime-syntax");
205pub const JULIA_GRAMMAR: &str = include_str!("../../grammars/julia.sublime-syntax");
207pub const NIM_GRAMMAR: &str = include_str!("../../grammars/nim.sublime-syntax");
209pub const GLEAM_GRAMMAR: &str = include_str!("../../grammars/gleam.sublime-syntax");
211pub const VLANG_GRAMMAR: &str = include_str!("../../grammars/vlang.sublime-syntax");
213pub const SOLIDITY_GRAMMAR: &str = include_str!("../../grammars/solidity.sublime-syntax");
215pub const KDL_GRAMMAR: &str = include_str!("../../grammars/kdl.sublime-syntax");
217pub const NUSHELL_GRAMMAR: &str = include_str!("../../grammars/nushell.sublime-syntax");
219pub const STARLARK_GRAMMAR: &str = include_str!("../../grammars/starlark.sublime-syntax");
221pub const JUSTFILE_GRAMMAR: &str = include_str!("../../grammars/justfile.sublime-syntax");
223pub const EARTHFILE_GRAMMAR: &str = include_str!("../../grammars/earthfile.sublime-syntax");
225pub const GOMOD_GRAMMAR: &str = include_str!("../../grammars/gomod.sublime-syntax");
227pub const VUE_GRAMMAR: &str = include_str!("../../grammars/vue.sublime-syntax");
229pub const SVELTE_GRAMMAR: &str = include_str!("../../grammars/svelte.sublime-syntax");
231pub const ASTRO_GRAMMAR: &str = include_str!("../../grammars/astro.sublime-syntax");
233pub const HYPRLANG_GRAMMAR: &str = include_str!("../../grammars/hyprlang.sublime-syntax");
235pub const AUTOHOTKEY_GRAMMAR: &str =
238 include_str!("../../grammars/autohotkey/AutoHotkey.sublime-syntax");
239pub const RACKET_GRAMMAR: &str = include_str!("../../grammars/racket.sublime-syntax");
241pub const VERILOG_GRAMMAR: &str = include_str!("../../grammars/verilog.sublime-syntax");
243pub const SYSTEMVERILOG_GRAMMAR: &str = include_str!("../../grammars/systemverilog.sublime-syntax");
245pub const VHDL_GRAMMAR: &str = include_str!("../../grammars/vhdl.sublime-syntax");
247
248pub const C3_GRAMMAR: &str = include_str!("../../grammars/c3.sublime-syntax");
249
250impl std::fmt::Debug for GrammarRegistry {
255 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
256 f.debug_struct("GrammarRegistry")
257 .field("syntax_count", &self.syntax_set.syntaxes().len())
258 .finish()
259 }
260}
261
262pub struct GrammarRegistry {
263 syntax_set: Arc<SyntaxSet>,
265 user_extensions: HashMap<String, String>,
267 filename_scopes: HashMap<String, String>,
269 loaded_grammar_paths: Vec<GrammarSpec>,
271 grammar_sources: HashMap<String, GrammarInfo>,
273 aliases: HashMap<String, String>,
277 catalog: Vec<GrammarEntry>,
281 catalog_by_name: HashMap<String, usize>,
284 catalog_by_extension: HashMap<String, usize>,
286 catalog_by_filename: HashMap<String, usize>,
288 applied_language_config: HashMap<String, crate::config::LanguageConfig>,
293 catalog_gen: u64,
297}
298
299impl GrammarRegistry {
300 pub(crate) fn new(
305 syntax_set: SyntaxSet,
306 user_extensions: HashMap<String, String>,
307 filename_scopes: HashMap<String, String>,
308 ) -> Self {
309 Self::new_with_loaded_paths(
310 syntax_set,
311 user_extensions,
312 filename_scopes,
313 Vec::new(),
314 HashMap::new(),
315 )
316 }
317
318 pub(crate) fn new_with_loaded_paths(
323 syntax_set: SyntaxSet,
324 user_extensions: HashMap<String, String>,
325 filename_scopes: HashMap<String, String>,
326 loaded_grammar_paths: Vec<GrammarSpec>,
327 grammar_sources: HashMap<String, GrammarInfo>,
328 ) -> Self {
329 let mut reg = Self {
330 syntax_set: Arc::new(syntax_set),
331 user_extensions,
332 filename_scopes,
333 loaded_grammar_paths,
334 grammar_sources,
335 aliases: HashMap::new(),
336 catalog: Vec::new(),
337 catalog_by_name: HashMap::new(),
338 catalog_by_extension: HashMap::new(),
339 catalog_by_filename: HashMap::new(),
340 applied_language_config: HashMap::new(),
341 catalog_gen: 0,
342 };
343 reg.rebuild_catalog();
344 reg
345 }
346
347 pub fn empty() -> Arc<Self> {
349 let mut builder = SyntaxSetBuilder::new();
350 builder.add_plain_text_syntax();
351 let mut reg = Self {
352 syntax_set: Arc::new(builder.build()),
353 user_extensions: HashMap::new(),
354 filename_scopes: HashMap::new(),
355 loaded_grammar_paths: Vec::new(),
356 grammar_sources: HashMap::new(),
357 aliases: HashMap::new(),
358 catalog: Vec::new(),
359 catalog_by_name: HashMap::new(),
360 catalog_by_extension: HashMap::new(),
361 catalog_by_filename: HashMap::new(),
362 applied_language_config: HashMap::new(),
363 catalog_gen: 0,
364 };
365 reg.rebuild_catalog();
366 Arc::new(reg)
367 }
368
369 pub fn defaults_only() -> Arc<Self> {
376 tracing::info!("defaults_only: loading pre-compiled syntax packdump...");
380 let syntax_set: SyntaxSet = syntect::dumps::from_uncompressed_data(include_bytes!(
381 concat!(env!("OUT_DIR"), "/default_syntaxes.packdump")
382 ))
383 .expect("Failed to load pre-compiled syntax packdump");
384 tracing::info!(
385 "defaults_only: loaded ({} syntaxes)",
386 syntax_set.syntaxes().len()
387 );
388 let grammar_sources = Self::build_grammar_sources_from_syntax_set(&syntax_set);
389 let filename_scopes = Self::build_filename_scopes();
390 let extra_extensions = Self::build_extra_extensions();
391 let mut registry = Self {
392 syntax_set: Arc::new(syntax_set),
393 user_extensions: extra_extensions,
394 filename_scopes,
395 loaded_grammar_paths: Vec::new(),
396 grammar_sources,
397 aliases: HashMap::new(),
398 catalog: Vec::new(),
399 catalog_by_name: HashMap::new(),
400 catalog_by_extension: HashMap::new(),
401 catalog_by_filename: HashMap::new(),
402 applied_language_config: HashMap::new(),
403 catalog_gen: 0,
404 };
405 registry.populate_built_in_aliases();
406 registry.rebuild_catalog();
407 Arc::new(registry)
408 }
409
410 pub(crate) fn build_extra_extensions() -> HashMap<String, String> {
415 let mut map = HashMap::new();
416
417 let js_scope = "source.js".to_string();
419 map.insert("cjs".to_string(), js_scope.clone());
420 map.insert("mjs".to_string(), js_scope);
421
422 map
426 }
427
428 pub(crate) fn build_filename_scopes() -> HashMap<String, String> {
430 let mut map = HashMap::new();
431
432 let shell_scope = "source.shell.bash".to_string();
434 for filename in [
435 ".zshrc",
436 ".zprofile",
437 ".zshenv",
438 ".zlogin",
439 ".zlogout",
440 ".bash_aliases",
441 "PKGBUILD",
444 "APKBUILD",
445 ] {
446 map.insert(filename.to_string(), shell_scope.clone());
447 }
448
449 let git_rebase_scope = "source.git-rebase-todo".to_string();
451 map.insert("git-rebase-todo".to_string(), git_rebase_scope);
452
453 let git_commit_scope = "source.git-commit".to_string();
455 for filename in ["COMMIT_EDITMSG", "MERGE_MSG", "SQUASH_MSG", "TAG_EDITMSG"] {
456 map.insert(filename.to_string(), git_commit_scope.clone());
457 }
458
459 let gitignore_scope = "source.gitignore".to_string();
461 for filename in [".gitignore", ".dockerignore", ".npmignore", ".hgignore"] {
462 map.insert(filename.to_string(), gitignore_scope.clone());
463 }
464
465 let gitconfig_scope = "source.gitconfig".to_string();
467 for filename in [".gitconfig", ".gitmodules"] {
468 map.insert(filename.to_string(), gitconfig_scope.clone());
469 }
470
471 let gitattributes_scope = "source.gitattributes".to_string();
473 map.insert(".gitattributes".to_string(), gitattributes_scope);
474
475 let groovy_scope = "source.groovy".to_string();
477 map.insert("Jenkinsfile".to_string(), groovy_scope);
478
479 let ruby_scope = "source.ruby".to_string();
482 map.insert("Brewfile".to_string(), ruby_scope);
483
484 let dockerfile_scope = "source.dockerfile".to_string();
486 map.insert("Dockerfile".to_string(), dockerfile_scope.clone());
487 map.insert("Containerfile".to_string(), dockerfile_scope.clone());
488 map.insert("Dockerfile.dev".to_string(), dockerfile_scope.clone());
490 map.insert("Dockerfile.prod".to_string(), dockerfile_scope.clone());
491 map.insert("Dockerfile.test".to_string(), dockerfile_scope.clone());
492 map.insert("Dockerfile.build".to_string(), dockerfile_scope.clone());
493
494 let cmake_scope = "source.cmake".to_string();
496 map.insert("CMakeLists.txt".to_string(), cmake_scope);
497
498 let starlark_scope = "source.starlark".to_string();
500 map.insert("BUILD".to_string(), starlark_scope.clone());
501 map.insert("BUILD.bazel".to_string(), starlark_scope.clone());
502 map.insert("WORKSPACE".to_string(), starlark_scope.clone());
503 map.insert("WORKSPACE.bazel".to_string(), starlark_scope.clone());
504 map.insert("Tiltfile".to_string(), starlark_scope);
505
506 let justfile_scope = "source.justfile".to_string();
508 map.insert("justfile".to_string(), justfile_scope.clone());
509 map.insert("Justfile".to_string(), justfile_scope.clone());
510 map.insert(".justfile".to_string(), justfile_scope);
511
512 let ini_scope = "source.ini".to_string();
514 map.insert(".editorconfig".to_string(), ini_scope);
515
516 let earthfile_scope = "source.earthfile".to_string();
518 map.insert("Earthfile".to_string(), earthfile_scope);
519
520 let hyprlang_scope = "source.hyprlang".to_string();
522 map.insert("hyprland.conf".to_string(), hyprlang_scope.clone());
523 map.insert("hyprpaper.conf".to_string(), hyprlang_scope.clone());
524 map.insert("hyprlock.conf".to_string(), hyprlang_scope);
525
526 let gomod_scope = "source.gomod".to_string();
528 map.insert("go.mod".to_string(), gomod_scope.clone());
529 map.insert("go.sum".to_string(), gomod_scope);
530
531 map
532 }
533
534 pub(crate) fn add_embedded_grammars(builder: &mut SyntaxSetBuilder) {
536 match SyntaxDefinition::load_from_str(TOML_GRAMMAR, true, Some("TOML")) {
538 Ok(syntax) => {
539 builder.add(syntax);
540 tracing::debug!("Loaded embedded TOML grammar");
541 }
542 Err(e) => {
543 tracing::warn!("Failed to load embedded TOML grammar: {}", e);
544 }
545 }
546
547 match SyntaxDefinition::load_from_str(ODIN_GRAMMAR, true, Some("Odin")) {
549 Ok(syntax) => {
550 builder.add(syntax);
551 tracing::debug!("Loaded embedded Odin grammar");
552 }
553 Err(e) => {
554 tracing::warn!("Failed to load embedded Odin grammar: {}", e);
555 }
556 }
557
558 match SyntaxDefinition::load_from_str(ZIG_GRAMMAR, true, Some("Zig")) {
560 Ok(syntax) => {
561 builder.add(syntax);
562 tracing::debug!("Loaded embedded Zig grammar");
563 }
564 Err(e) => {
565 tracing::warn!("Failed to load embedded Zig grammar: {}", e);
566 }
567 }
568
569 match SyntaxDefinition::load_from_str(GDSCRIPT_GRAMMAR, true, Some("GDScript")) {
571 Ok(syntax) => {
572 builder.add(syntax);
573 tracing::debug!("Loaded embedded GDScript grammar");
574 }
575 Err(e) => {
576 tracing::warn!("Failed to load embedded GDScript grammar: {}", e);
577 }
578 }
579
580 match SyntaxDefinition::load_from_str(GIT_REBASE_GRAMMAR, true, Some("Git Rebase Todo")) {
582 Ok(syntax) => {
583 builder.add(syntax);
584 tracing::debug!("Loaded embedded Git Rebase Todo grammar");
585 }
586 Err(e) => {
587 tracing::warn!("Failed to load embedded Git Rebase Todo grammar: {}", e);
588 }
589 }
590
591 match SyntaxDefinition::load_from_str(GIT_COMMIT_GRAMMAR, true, Some("Git Commit Message"))
593 {
594 Ok(syntax) => {
595 builder.add(syntax);
596 tracing::debug!("Loaded embedded Git Commit Message grammar");
597 }
598 Err(e) => {
599 tracing::warn!("Failed to load embedded Git Commit Message grammar: {}", e);
600 }
601 }
602
603 match SyntaxDefinition::load_from_str(GITIGNORE_GRAMMAR, true, Some("Gitignore")) {
605 Ok(syntax) => {
606 builder.add(syntax);
607 tracing::debug!("Loaded embedded Gitignore grammar");
608 }
609 Err(e) => {
610 tracing::warn!("Failed to load embedded Gitignore grammar: {}", e);
611 }
612 }
613
614 match SyntaxDefinition::load_from_str(GITCONFIG_GRAMMAR, true, Some("Git Config")) {
616 Ok(syntax) => {
617 builder.add(syntax);
618 tracing::debug!("Loaded embedded Git Config grammar");
619 }
620 Err(e) => {
621 tracing::warn!("Failed to load embedded Git Config grammar: {}", e);
622 }
623 }
624
625 match SyntaxDefinition::load_from_str(GITATTRIBUTES_GRAMMAR, true, Some("Git Attributes")) {
627 Ok(syntax) => {
628 builder.add(syntax);
629 tracing::debug!("Loaded embedded Git Attributes grammar");
630 }
631 Err(e) => {
632 tracing::warn!("Failed to load embedded Git Attributes grammar: {}", e);
633 }
634 }
635
636 match SyntaxDefinition::load_from_str(TYPST_GRAMMAR, true, Some("Typst")) {
638 Ok(syntax) => {
639 builder.add(syntax);
640 tracing::debug!("Loaded embedded Typst grammar");
641 }
642 Err(e) => {
643 tracing::warn!("Failed to load embedded Typst grammar: {}", e);
644 }
645 }
646
647 let additional_grammars: &[(&str, &str)] = &[
649 (DOCKERFILE_GRAMMAR, "Dockerfile"),
650 (INI_GRAMMAR, "INI"),
651 (CMAKE_GRAMMAR, "CMake"),
652 (SCSS_GRAMMAR, "SCSS"),
653 (LESS_GRAMMAR, "LESS"),
654 (POWERSHELL_GRAMMAR, "PowerShell"),
655 (KOTLIN_GRAMMAR, "Kotlin"),
656 (SWIFT_GRAMMAR, "Swift"),
657 (DART_GRAMMAR, "Dart"),
658 (ELIXIR_GRAMMAR, "Elixir"),
659 (FSHARP_GRAMMAR, "FSharp"),
660 (NIX_GRAMMAR, "Nix"),
661 (HCL_GRAMMAR, "HCL"),
662 (PROTOBUF_GRAMMAR, "Protocol Buffers"),
663 (GRAPHQL_GRAMMAR, "GraphQL"),
664 (JULIA_GRAMMAR, "Julia"),
665 (NIM_GRAMMAR, "Nim"),
666 (GLEAM_GRAMMAR, "Gleam"),
667 (VLANG_GRAMMAR, "V"),
668 (SOLIDITY_GRAMMAR, "Solidity"),
669 (KDL_GRAMMAR, "KDL"),
670 (NUSHELL_GRAMMAR, "Nushell"),
671 (STARLARK_GRAMMAR, "Starlark"),
672 (JUSTFILE_GRAMMAR, "Justfile"),
673 (EARTHFILE_GRAMMAR, "Earthfile"),
674 (GOMOD_GRAMMAR, "Go Module"),
675 (VUE_GRAMMAR, "Vue"),
676 (SVELTE_GRAMMAR, "Svelte"),
677 (ASTRO_GRAMMAR, "Astro"),
678 (HYPRLANG_GRAMMAR, "Hyprlang"),
679 (AUTOHOTKEY_GRAMMAR, "AutoHotkey"),
680 (RACKET_GRAMMAR, "Racket"),
681 (VERILOG_GRAMMAR, "Verilog"),
682 (SYSTEMVERILOG_GRAMMAR, "SystemVerilog"),
683 (VHDL_GRAMMAR, "VHDL"),
684 (C3_GRAMMAR, "C3"),
685 ];
686
687 for (grammar_str, name) in additional_grammars {
688 match SyntaxDefinition::load_from_str(grammar_str, true, Some(name)) {
689 Ok(syntax) => {
690 builder.add(syntax);
691 tracing::debug!("Loaded embedded {} grammar", name);
692 }
693 Err(e) => {
694 tracing::warn!("Failed to load embedded {} grammar: {}", name, e);
695 }
696 }
697 }
698 }
699
700 pub fn find_syntax_for_file(&self, path: &Path) -> Option<&SyntaxReference> {
706 let entry = self.find_by_path(path, None)?;
707 entry
708 .engines
709 .syntect
710 .map(|i| &self.syntax_set.syntaxes()[i])
711 }
712
713 pub fn find_syntax_by_name(&self, name: &str) -> Option<&SyntaxReference> {
721 if let Some(entry) = self.find_by_name(name) {
722 if let Some(idx) = entry.engines.syntect {
723 return Some(&self.syntax_set.syntaxes()[idx]);
724 }
725 }
726 self.syntax_set.find_syntax_by_name(name)
730 }
731
732 fn built_in_aliases() -> Vec<(&'static str, &'static str)> {
741 vec![
742 ("bash", "Bourne Again Shell (bash)"),
744 ("shell", "Bourne Again Shell (bash)"),
745 ("sh", "Bourne Again Shell (bash)"),
746 ("c++", "C++"),
747 ("cpp", "C++"),
748 ("csharp", "C#"),
749 ("objc", "Objective-C"),
750 ("objcpp", "Objective-C++"),
751 ("regex", "Regular Expressions (Python)"),
752 ("regexp", "Regular Expressions (Python)"),
753 ("proto", "Protocol Buffers"),
755 ("protobuf", "Protocol Buffers"),
756 ("gomod", "Go Module"),
757 ("git-rebase", "Git Rebase Todo"),
758 ("git-commit", "Git Commit Message"),
759 ("git-config", "Git Config"),
760 ("git-attributes", "Git Attributes"),
761 ("gitignore", "Gitignore"),
762 ("fsharp", "FSharp"),
763 ("f#", "FSharp"),
764 ("terraform", "HCL"),
765 ("tf", "HCL"),
766 ("ts", "TypeScript"),
767 ("js", "JavaScript"),
768 ("py", "Python"),
769 ("rb", "Ruby"),
770 ("rs", "Rust"),
771 ("md", "Markdown"),
772 ("yml", "YAML"),
773 ("dockerfile", "Dockerfile"),
774 ]
775 }
776
777 pub(crate) fn populate_built_in_aliases(&mut self) {
784 for (short, full) in Self::built_in_aliases() {
785 self.register_alias_inner(short, full, true);
786 }
787 self.rebuild_catalog();
788 }
789
790 pub(crate) fn register_alias(&mut self, short_name: &str, full_name: &str) -> bool {
800 if !self.register_alias_inner(short_name, full_name, false) {
801 return false;
802 }
803 let short_lower = short_name.to_lowercase();
804 let full_lower = full_name.to_lowercase();
805 if let Some(&idx) = self.catalog_by_name.get(&full_lower) {
806 self.catalog_by_name
807 .entry(short_lower.clone())
808 .or_insert(idx);
809 let entry = &mut self.catalog[idx];
810 let replace = match &entry.short_name {
811 None => true,
812 Some(existing) => short_name.len() < existing.len(),
813 };
814 if replace {
815 entry.short_name = Some(short_lower);
816 }
817 }
818 true
819 }
820
821 fn register_alias_inner(
822 &mut self,
823 short_name: &str,
824 full_name: &str,
825 is_built_in: bool,
826 ) -> bool {
827 let short_lower = short_name.to_lowercase();
828
829 let target_exists = self
831 .syntax_set
832 .syntaxes()
833 .iter()
834 .any(|s| s.name.eq_ignore_ascii_case(full_name));
835 if !target_exists {
836 if tree_sitter_for_syntect_name(full_name).is_some() {
840 return false;
841 }
842 if is_built_in {
843 tracing::warn!(
846 "[grammar-alias] Built-in alias '{}' -> '{}': target grammar not found, skipping",
847 short_name, full_name
848 );
849 } else {
850 tracing::warn!(
851 "[grammar-alias] Alias '{}' -> '{}': target grammar not found, skipping",
852 short_name,
853 full_name
854 );
855 }
856 return false;
857 }
858
859 let collides_with_full_name = self
861 .syntax_set
862 .syntaxes()
863 .iter()
864 .any(|s| s.name.eq_ignore_ascii_case(&short_lower));
865 if collides_with_full_name {
866 tracing::debug!(
870 "[grammar-alias] Alias '{}' matches an existing grammar name, skipping (not needed)",
871 short_name
872 );
873 return false;
874 }
875
876 if let Some(existing_target) = self.aliases.get(&short_lower) {
878 if existing_target.eq_ignore_ascii_case(full_name) {
879 return true;
881 }
882 let msg = format!(
883 "Alias '{}' already maps to '{}', cannot remap to '{}'",
884 short_name, existing_target, full_name
885 );
886 if is_built_in {
887 panic!("[grammar-alias] Built-in alias collision: {}", msg);
888 } else {
889 tracing::warn!("[grammar-alias] {}", msg);
890 return false;
891 }
892 }
893
894 let exact_name = self
896 .syntax_set
897 .syntaxes()
898 .iter()
899 .find(|s| s.name.eq_ignore_ascii_case(full_name))
900 .map(|s| s.name.clone())
901 .unwrap();
902
903 self.aliases.insert(short_lower, exact_name);
904 true
905 }
906
907 pub(crate) fn rebuild_catalog(&mut self) {
922 let mut short_by_full: HashMap<String, String> = HashMap::new();
929 let record = |map: &mut HashMap<String, String>, short: &str, full: &str| {
930 let key = full.to_lowercase();
931 let keep = match map.get(&key) {
932 None => true,
933 Some(existing) => short.len() < existing.len(),
934 };
935 if keep {
936 map.insert(key, short.to_string());
937 }
938 };
939 for (short, full) in Self::built_in_aliases() {
940 record(&mut short_by_full, short, full);
941 }
942 for (short, full) in &self.aliases {
943 record(&mut short_by_full, short, full);
944 }
945
946 let derive_language_id =
947 |display_name: &str| -> (String, Option<fresh_languages::Language>) {
948 let ts = tree_sitter_for_syntect_name(display_name);
949 let id = ts
950 .map(|l| l.id().to_string())
951 .unwrap_or_else(|| display_name.to_lowercase());
952 (id, ts)
953 };
954
955 let mut catalog: Vec<GrammarEntry> = Vec::new();
956 let mut scope_to_index: HashMap<String, usize> = HashMap::new();
957
958 for (idx, syntax) in self.syntax_set.syntaxes().iter().enumerate() {
979 if syntax.name == "Plain Text" || syntax.name == "JavaScript" {
980 continue;
981 }
982 let (language_id, tree_sitter) = derive_language_id(&syntax.name);
983 let short_name = short_by_full.get(&syntax.name.to_lowercase()).cloned();
984 let source = self
985 .grammar_sources
986 .get(&syntax.name)
987 .map(|info| info.source.clone())
988 .unwrap_or(GrammarSource::BuiltIn);
989 let entry_index = catalog.len();
990 scope_to_index.insert(syntax.scope.to_string(), entry_index);
991
992 let mut extensions = syntax.file_extensions.clone();
998 if let Some(lang) = tree_sitter {
999 for ext in lang.extensions() {
1000 let ext = ext.to_string();
1001 if !extensions.iter().any(|e| e == &ext) {
1002 extensions.push(ext);
1003 }
1004 }
1005 }
1006
1007 catalog.push(GrammarEntry {
1008 display_name: syntax.name.clone(),
1009 language_id,
1010 short_name,
1011 extensions,
1012 filenames: Vec::new(),
1013 filename_globs: Vec::new(),
1014 source,
1015 engines: GrammarEngines {
1016 syntect: Some(idx),
1017 tree_sitter,
1018 },
1019 });
1020 }
1021
1022 for (filename, scope) in &self.filename_scopes {
1024 if let Some(&idx) = scope_to_index.get(scope) {
1025 if !catalog[idx].filenames.iter().any(|f| f == filename) {
1026 catalog[idx].filenames.push(filename.clone());
1027 }
1028 }
1029 }
1030
1031 for (ext, scope) in &self.user_extensions {
1033 if let Some(&idx) = scope_to_index.get(scope) {
1034 if !catalog[idx].extensions.iter().any(|e| e == ext) {
1035 catalog[idx].extensions.push(ext.clone());
1036 }
1037 }
1038 }
1039
1040 let mut ts_covered: std::collections::HashSet<fresh_languages::Language> =
1045 std::collections::HashSet::new();
1046 for entry in &catalog {
1047 if let Some(lang) = entry.engines.tree_sitter {
1048 ts_covered.insert(lang);
1049 }
1050 }
1051 for lang in fresh_languages::Language::all() {
1052 if ts_covered.contains(lang) {
1053 continue;
1054 }
1055 let display_name = lang.display_name().to_string();
1056 let language_id = lang.id().to_string();
1057 let short_name = short_by_full.get(&display_name.to_lowercase()).cloned();
1058 let extensions: Vec<String> = lang.extensions().iter().map(|s| s.to_string()).collect();
1059 catalog.push(GrammarEntry {
1060 display_name,
1061 language_id,
1062 short_name,
1063 extensions,
1064 filenames: Vec::new(),
1065 filename_globs: Vec::new(),
1066 source: GrammarSource::BuiltIn,
1067 engines: GrammarEngines {
1068 syntect: None,
1069 tree_sitter: Some(*lang),
1070 },
1071 });
1072 }
1073
1074 let mut by_name: HashMap<String, usize> = HashMap::new();
1082 let mut by_extension: HashMap<String, usize> = HashMap::new();
1083 let mut by_filename: HashMap<String, usize> = HashMap::new();
1084 for (idx, entry) in catalog.iter().enumerate() {
1085 by_name.insert(entry.display_name.to_lowercase(), idx);
1086 by_name.insert(entry.language_id.to_lowercase(), idx);
1087 if let Some(short) = &entry.short_name {
1088 by_name.insert(short.to_lowercase(), idx);
1089 }
1090 for ext in &entry.extensions {
1091 by_extension.entry(ext.to_lowercase()).or_insert(idx);
1092 by_filename.entry(ext.clone()).or_insert(idx);
1093 }
1094 for filename in &entry.filenames {
1095 by_filename.entry(filename.clone()).or_insert(idx);
1096 }
1097 }
1098
1099 self.catalog = catalog;
1100 self.catalog_by_name = by_name;
1101 self.catalog_by_extension = by_extension;
1102 self.catalog_by_filename = by_filename;
1103
1104 if !self.applied_language_config.is_empty() {
1108 let cfg = std::mem::take(&mut self.applied_language_config);
1109 self.apply_language_config_inner(&cfg);
1110 self.applied_language_config = cfg;
1111 }
1112 self.catalog_gen = self.catalog_gen.wrapping_add(1);
1113 }
1114
1115 pub fn catalog(&self) -> &[GrammarEntry] {
1117 &self.catalog
1118 }
1119
1120 pub fn catalog_gen(&self) -> u64 {
1124 self.catalog_gen
1125 }
1126
1127 pub fn find_by_name(&self, name: &str) -> Option<&GrammarEntry> {
1133 self.catalog_by_name
1134 .get(&name.to_lowercase())
1135 .map(|&idx| &self.catalog[idx])
1136 }
1137
1138 pub fn find_by_path(&self, path: &Path, first_line: Option<&str>) -> Option<&GrammarEntry> {
1159 let filename = path.file_name().and_then(|n| n.to_str());
1160 let path_str = path.to_str().unwrap_or("");
1161
1162 if let Some(name) = filename {
1163 if let Some(&idx) = self.catalog_by_filename.get(name) {
1164 return Some(&self.catalog[idx]);
1165 }
1166 }
1167
1168 if let Some(name) = filename {
1170 for entry in &self.catalog {
1171 for pattern in &entry.filename_globs {
1172 let matched = if is_path_pattern(pattern) {
1173 path_glob_matches(pattern, path_str)
1174 } else {
1175 filename_glob_matches(pattern, name)
1176 };
1177 if matched {
1178 return Some(entry);
1179 }
1180 }
1181 }
1182 }
1183
1184 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1185 if let Some(entry) = self.find_by_extension(ext) {
1186 return Some(entry);
1187 }
1188 }
1189
1190 let line = first_line?;
1195 let syntax = self.syntax_set.find_syntax_by_first_line(line)?;
1196 self.find_by_name(&syntax.name)
1197 }
1198
1199 pub fn find_by_extension(&self, ext: &str) -> Option<&GrammarEntry> {
1201 self.catalog_by_extension
1202 .get(&ext.to_lowercase())
1203 .map(|&idx| &self.catalog[idx])
1204 }
1205
1206 pub fn apply_language_config(
1219 &mut self,
1220 languages: &HashMap<String, crate::config::LanguageConfig>,
1221 ) {
1222 self.applied_language_config = languages.clone();
1223 self.apply_language_config_inner(languages);
1224 self.catalog_gen = self.catalog_gen.wrapping_add(1);
1225 }
1226
1227 fn apply_language_config_inner(
1232 &mut self,
1233 languages: &HashMap<String, crate::config::LanguageConfig>,
1234 ) {
1235 for (lang_id, lang_cfg) in languages {
1236 let grammar_name = if lang_cfg.grammar.is_empty() {
1237 lang_id.as_str()
1238 } else {
1239 lang_cfg.grammar.as_str()
1240 };
1241
1242 let idx = self
1244 .catalog_by_name
1245 .get(&grammar_name.to_lowercase())
1246 .copied()
1247 .or_else(|| self.catalog_by_name.get(&lang_id.to_lowercase()).copied())
1248 .unwrap_or_else(|| {
1249 let idx = self.catalog.len();
1250 self.catalog.push(GrammarEntry {
1251 display_name: lang_id.clone(),
1252 language_id: lang_id.clone(),
1253 short_name: None,
1254 extensions: Vec::new(),
1255 filenames: Vec::new(),
1256 filename_globs: Vec::new(),
1257 source: GrammarSource::BuiltIn,
1258 engines: GrammarEngines::default(),
1259 });
1260 idx
1261 });
1262
1263 self.catalog_by_name
1268 .entry(lang_id.to_lowercase())
1269 .or_insert(idx);
1270
1271 for ext in &lang_cfg.extensions {
1272 if !self.catalog[idx].extensions.iter().any(|e| e == ext) {
1273 self.catalog[idx].extensions.push(ext.clone());
1274 }
1275 self.catalog_by_extension.insert(ext.to_lowercase(), idx);
1277 }
1278 for filename in &lang_cfg.filenames {
1279 if is_glob_pattern(filename) {
1280 if !self.catalog[idx]
1281 .filename_globs
1282 .iter()
1283 .any(|f| f == filename)
1284 {
1285 self.catalog[idx].filename_globs.push(filename.clone());
1286 }
1287 } else {
1288 if !self.catalog[idx].filenames.iter().any(|f| f == filename) {
1289 self.catalog[idx].filenames.push(filename.clone());
1290 }
1291 self.catalog_by_filename.insert(filename.clone(), idx);
1292 }
1293 }
1294 }
1295 }
1296
1297 pub fn syntax_set(&self) -> &Arc<SyntaxSet> {
1299 &self.syntax_set
1300 }
1301
1302 pub fn syntax_set_arc(&self) -> Arc<SyntaxSet> {
1304 Arc::clone(&self.syntax_set)
1305 }
1306
1307 pub fn available_syntaxes(&self) -> Vec<&str> {
1309 self.syntax_set
1310 .syntaxes()
1311 .iter()
1312 .map(|s| s.name.as_str())
1313 .collect()
1314 }
1315
1316 pub fn available_grammar_info(&self) -> Vec<GrammarInfo> {
1323 let mut result: Vec<GrammarInfo> = self
1324 .catalog
1325 .iter()
1326 .map(|entry| GrammarInfo {
1327 name: entry.display_name.clone(),
1328 source: entry.source.clone(),
1329 file_extensions: entry.extensions.clone(),
1330 short_name: entry.short_name.clone(),
1331 })
1332 .collect();
1333 result.sort_by(|a, b| a.name.to_lowercase().cmp(&b.name.to_lowercase()));
1334 result
1335 }
1336
1337 pub(crate) fn grammar_sources(&self) -> &HashMap<String, GrammarInfo> {
1339 &self.grammar_sources
1340 }
1341
1342 pub(crate) fn build_grammar_sources_from_syntax_set(
1346 syntax_set: &SyntaxSet,
1347 ) -> HashMap<String, GrammarInfo> {
1348 let mut sources = HashMap::new();
1349 for syntax in syntax_set.syntaxes() {
1350 sources.insert(
1351 syntax.name.clone(),
1352 GrammarInfo {
1353 name: syntax.name.clone(),
1354 source: GrammarSource::BuiltIn,
1355 file_extensions: syntax.file_extensions.clone(),
1356 short_name: None,
1357 },
1358 );
1359 }
1360 sources
1361 }
1362
1363 #[cfg(test)]
1365 pub(crate) fn user_extensions(&self) -> &HashMap<String, String> {
1366 &self.user_extensions
1367 }
1368
1369 #[cfg(test)]
1371 pub(crate) fn loaded_grammar_paths(&self) -> &[GrammarSpec] {
1372 &self.loaded_grammar_paths
1373 }
1374
1375 pub fn with_additional_grammars(
1389 base: &GrammarRegistry,
1390 additional: &[GrammarSpec],
1391 ) -> Option<Self> {
1392 tracing::info!(
1393 "[SYNTAX DEBUG] with_additional_grammars: adding {} grammars to base with {} syntaxes",
1394 additional.len(),
1395 base.syntax_set.syntaxes().len()
1396 );
1397
1398 let mut builder = (*base.syntax_set).clone().into_builder();
1402
1403 let mut user_extensions = base.user_extensions.clone();
1405
1406 let mut loaded_grammar_paths = base.loaded_grammar_paths.clone();
1408
1409 let mut grammar_sources = base.grammar_sources.clone();
1411
1412 for spec in additional {
1414 tracing::info!(
1415 "[SYNTAX DEBUG] loading new grammar file: lang='{}', path={:?}, extensions={:?}",
1416 spec.language,
1417 spec.path,
1418 spec.extensions
1419 );
1420 match Self::load_grammar_file(&spec.path) {
1421 Ok(syntax) => {
1422 let scope = syntax.scope.to_string();
1423 let syntax_name = syntax.name.clone();
1424 tracing::info!(
1425 "[SYNTAX DEBUG] grammar loaded successfully: name='{}', scope='{}'",
1426 syntax_name,
1427 scope
1428 );
1429 builder.add(syntax);
1430 tracing::info!(
1431 "Loaded grammar for '{}' from {:?} with extensions {:?}",
1432 spec.language,
1433 spec.path,
1434 spec.extensions
1435 );
1436 for ext in &spec.extensions {
1438 user_extensions.insert(ext.clone(), scope.clone());
1439 }
1440 grammar_sources.insert(
1442 syntax_name.clone(),
1443 GrammarInfo {
1444 name: syntax_name,
1445 source: GrammarSource::Plugin {
1446 plugin: spec.language.clone(),
1447 path: spec.path.clone(),
1448 },
1449 file_extensions: spec.extensions.clone(),
1450 short_name: None,
1451 },
1452 );
1453 loaded_grammar_paths.push(spec.clone());
1455 }
1456 Err(e) => {
1457 tracing::warn!(
1458 "Failed to load grammar for '{}' from {:?}: {}",
1459 spec.language,
1460 spec.path,
1461 e
1462 );
1463 }
1464 }
1465 }
1466
1467 let mut reg = Self {
1468 syntax_set: Arc::new(builder.build()),
1469 user_extensions,
1470 filename_scopes: base.filename_scopes.clone(),
1471 loaded_grammar_paths,
1472 grammar_sources,
1473 aliases: base.aliases.clone(),
1474 catalog: Vec::new(),
1475 catalog_by_name: HashMap::new(),
1476 catalog_by_extension: HashMap::new(),
1477 catalog_by_filename: HashMap::new(),
1478 applied_language_config: HashMap::new(),
1479 catalog_gen: 0,
1480 };
1481 reg.rebuild_catalog();
1482 Some(reg)
1483 }
1484
1485 pub(crate) fn load_grammar_file(path: &Path) -> Result<SyntaxDefinition, String> {
1491 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
1492
1493 match ext {
1494 "sublime-syntax" => {
1495 let content = std::fs::read_to_string(path)
1496 .map_err(|e| format!("Failed to read file: {}", e))?;
1497 SyntaxDefinition::load_from_str(
1498 &content,
1499 true,
1500 path.file_stem().and_then(|s| s.to_str()),
1501 )
1502 .map_err(|e| format!("Failed to parse sublime-syntax: {}", e))
1503 }
1504 _ => Err(format!(
1505 "Unsupported grammar format: .{}. Only .sublime-syntax is supported.",
1506 ext
1507 )),
1508 }
1509 }
1510}
1511
1512impl Default for GrammarRegistry {
1513 fn default() -> Self {
1514 let defaults = SyntaxSet::load_defaults_newlines();
1516 let mut builder = defaults.into_builder();
1517 Self::add_embedded_grammars(&mut builder);
1518 let syntax_set = builder.build();
1519 let filename_scopes = Self::build_filename_scopes();
1520 let extra_extensions = Self::build_extra_extensions();
1521
1522 let mut registry = Self::new(syntax_set, extra_extensions, filename_scopes);
1523 registry.populate_built_in_aliases();
1524 registry.rebuild_catalog();
1525 registry
1526 }
1527}
1528
1529#[derive(Debug, Deserialize)]
1532pub struct PackageManifest {
1533 #[serde(default)]
1534 pub contributes: Option<Contributes>,
1535}
1536
1537#[derive(Debug, Deserialize, Default)]
1538pub struct Contributes {
1539 #[serde(default)]
1540 pub languages: Vec<LanguageContribution>,
1541 #[serde(default)]
1542 pub grammars: Vec<GrammarContribution>,
1543}
1544
1545#[derive(Debug, Deserialize)]
1546pub struct LanguageContribution {
1547 pub id: String,
1548 #[serde(default)]
1549 pub extensions: Vec<String>,
1550}
1551
1552#[derive(Debug, Deserialize)]
1553pub struct GrammarContribution {
1554 pub language: String,
1555 #[serde(rename = "scopeName")]
1556 pub scope_name: String,
1557 pub path: String,
1558}
1559
1560#[cfg(test)]
1561mod tests {
1562 use super::*;
1563
1564 #[test]
1565 fn test_empty_registry() {
1566 let registry = GrammarRegistry::empty();
1567 assert!(!registry.available_syntaxes().is_empty());
1569 }
1570
1571 #[test]
1572 fn test_default_registry() {
1573 let registry = GrammarRegistry::default();
1574 assert!(!registry.available_syntaxes().is_empty());
1576 }
1577
1578 #[test]
1579 fn test_find_syntax_for_common_extensions() {
1580 let registry = GrammarRegistry::default();
1581
1582 let test_cases = [
1589 ("test.py", true),
1590 ("test.rs", true),
1591 ("test.js", false),
1592 ("test.json", true),
1593 ("test.md", true),
1594 ("test.html", true),
1595 ("test.css", true),
1596 ("test.gd", true),
1597 ("test.unknown_extension_xyz", false),
1598 ];
1599
1600 for (filename, should_exist) in test_cases {
1601 let path = Path::new(filename);
1602 let result = registry.find_syntax_for_file(path);
1603 assert_eq!(
1604 result.is_some(),
1605 should_exist,
1606 "Expected {:?} for {}",
1607 should_exist,
1608 filename
1609 );
1610 }
1611 }
1612
1613 #[test]
1614 fn test_racket_grammar_loaded() {
1615 let registry = GrammarRegistry::default();
1616 for filename in ["main.rkt", "data.rktd", "info.rktl", "doc.scrbl"] {
1617 let result = registry.find_syntax_for_file(Path::new(filename));
1618 assert!(
1619 result.is_some(),
1620 "Racket grammar should be available for {}",
1621 filename
1622 );
1623 let entry = registry.find_by_path(Path::new(filename), None).unwrap();
1624 assert_eq!(entry.display_name, "Racket", "for {}", filename);
1625 }
1626 }
1627
1628 #[test]
1629 fn test_syntax_set_arc() {
1630 let registry = GrammarRegistry::default();
1631 let arc1 = registry.syntax_set_arc();
1632 let arc2 = registry.syntax_set_arc();
1633 assert!(Arc::ptr_eq(&arc1, &arc2));
1635 }
1636
1637 #[test]
1638 fn test_shell_dotfiles_detection() {
1639 let registry = GrammarRegistry::default();
1640
1641 let shell_files = [".zshrc", ".zprofile", ".zshenv", ".bash_aliases"];
1643
1644 for filename in shell_files {
1645 let path = Path::new(filename);
1646 let result = registry.find_syntax_for_file(path);
1647 assert!(
1648 result.is_some(),
1649 "{} should be detected as a syntax",
1650 filename
1651 );
1652 let syntax = result.unwrap();
1653 assert!(
1655 syntax.name.to_lowercase().contains("bash")
1656 || syntax.name.to_lowercase().contains("shell"),
1657 "{} should be detected as shell/bash, got: {}",
1658 filename,
1659 syntax.name
1660 );
1661 }
1662 }
1663
1664 #[test]
1665 fn test_pkgbuild_detection() {
1666 let registry = GrammarRegistry::default();
1667
1668 for filename in ["PKGBUILD", "APKBUILD"] {
1670 let path = Path::new(filename);
1671 let result = registry.find_syntax_for_file(path);
1672 assert!(
1673 result.is_some(),
1674 "{} should be detected as a syntax",
1675 filename
1676 );
1677 let syntax = result.unwrap();
1678 assert!(
1680 syntax.name.to_lowercase().contains("bash")
1681 || syntax.name.to_lowercase().contains("shell"),
1682 "{} should be detected as shell/bash, got: {}",
1683 filename,
1684 syntax.name
1685 );
1686 }
1687 }
1688
1689 #[test]
1690 fn test_find_syntax_with_glob_filenames() {
1691 let mut registry = GrammarRegistry::default();
1692 let mut languages = std::collections::HashMap::new();
1693 languages.insert(
1694 "shell-configs".to_string(),
1695 crate::config::LanguageConfig {
1696 extensions: vec!["sh".to_string()],
1697 filenames: vec!["*.conf".to_string(), "*rc".to_string()],
1698 grammar: "bash".to_string(),
1699 comment_prefix: Some("#".to_string()),
1700 auto_indent: true,
1701 auto_close: None,
1702 auto_surround: None,
1703 textmate_grammar: None,
1704 show_whitespace_tabs: true,
1705 line_wrap: None,
1706 wrap_column: None,
1707 page_view: None,
1708 page_width: None,
1709 use_tabs: None,
1710 tab_size: None,
1711 formatter: None,
1712 format_on_save: false,
1713 on_save: vec![],
1714 word_characters: None,
1715 indent: None,
1716 },
1717 );
1718 registry.apply_language_config(&languages);
1719
1720 assert!(
1721 registry
1722 .find_by_path(Path::new("nftables.conf"), None)
1723 .is_some(),
1724 "*.conf should match nftables.conf"
1725 );
1726 assert!(
1727 registry.find_by_path(Path::new("lfrc"), None).is_some(),
1728 "*rc should match lfrc"
1729 );
1730 let _ = registry.find_by_path(Path::new("randomfile"), None);
1732 }
1733
1734 #[test]
1735 fn test_find_syntax_with_path_glob_filenames() {
1736 let mut registry = GrammarRegistry::default();
1737 let mut languages = std::collections::HashMap::new();
1738 languages.insert(
1739 "shell-configs".to_string(),
1740 crate::config::LanguageConfig {
1741 extensions: vec!["sh".to_string()],
1742 filenames: vec!["/etc/**/rc.*".to_string()],
1743 grammar: "bash".to_string(),
1744 comment_prefix: Some("#".to_string()),
1745 auto_indent: true,
1746 auto_close: None,
1747 auto_surround: None,
1748 textmate_grammar: None,
1749 show_whitespace_tabs: true,
1750 line_wrap: None,
1751 wrap_column: None,
1752 page_view: None,
1753 page_width: None,
1754 use_tabs: None,
1755 tab_size: None,
1756 formatter: None,
1757 format_on_save: false,
1758 on_save: vec![],
1759 word_characters: None,
1760 indent: None,
1761 },
1762 );
1763 registry.apply_language_config(&languages);
1764
1765 assert!(
1766 registry
1767 .find_by_path(Path::new("/etc/rc.conf"), None)
1768 .is_some(),
1769 "/etc/**/rc.* should match /etc/rc.conf"
1770 );
1771 assert!(
1772 registry
1773 .find_by_path(Path::new("/etc/init/rc.local"), None)
1774 .is_some(),
1775 "/etc/**/rc.* should match /etc/init/rc.local"
1776 );
1777 let _ = registry.find_by_path(Path::new("/var/rc.conf"), None);
1778 }
1779
1780 #[test]
1781 fn test_exact_filename_takes_priority_over_glob() {
1782 let mut registry = GrammarRegistry::default();
1783 let mut languages = std::collections::HashMap::new();
1784
1785 languages.insert(
1787 "custom-lfrc".to_string(),
1788 crate::config::LanguageConfig {
1789 extensions: vec![],
1790 filenames: vec!["lfrc".to_string()],
1791 grammar: "python".to_string(),
1792 comment_prefix: Some("#".to_string()),
1793 auto_indent: true,
1794 auto_close: None,
1795 auto_surround: None,
1796 textmate_grammar: None,
1797 show_whitespace_tabs: true,
1798 line_wrap: None,
1799 wrap_column: None,
1800 page_view: None,
1801 page_width: None,
1802 use_tabs: None,
1803 tab_size: None,
1804 formatter: None,
1805 format_on_save: false,
1806 on_save: vec![],
1807 word_characters: None,
1808 indent: None,
1809 },
1810 );
1811
1812 languages.insert(
1814 "rc-files".to_string(),
1815 crate::config::LanguageConfig {
1816 extensions: vec![],
1817 filenames: vec!["*rc".to_string()],
1818 grammar: "bash".to_string(),
1819 comment_prefix: Some("#".to_string()),
1820 auto_indent: true,
1821 auto_close: None,
1822 auto_surround: None,
1823 textmate_grammar: None,
1824 show_whitespace_tabs: true,
1825 line_wrap: None,
1826 wrap_column: None,
1827 page_view: None,
1828 page_width: None,
1829 use_tabs: None,
1830 tab_size: None,
1831 formatter: None,
1832 format_on_save: false,
1833 on_save: vec![],
1834 word_characters: None,
1835 indent: None,
1836 },
1837 );
1838
1839 registry.apply_language_config(&languages);
1840
1841 let entry = registry.find_by_path(Path::new("lfrc"), None).unwrap();
1843 assert!(
1844 entry.display_name.to_lowercase().contains("python"),
1845 "exact match should win over glob, got: {}",
1846 entry.display_name
1847 );
1848 }
1849
1850 #[test]
1851 fn test_built_in_aliases_resolve() {
1852 let registry = GrammarRegistry::default();
1853
1854 let syntax = registry.find_syntax_by_name("bash");
1856 assert!(syntax.is_some(), "alias 'bash' should resolve");
1857 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1858
1859 let syntax = registry.find_syntax_by_name("cpp");
1861 assert!(syntax.is_some(), "alias 'cpp' should resolve");
1862 assert_eq!(syntax.unwrap().name, "C++");
1863
1864 let syntax = registry.find_syntax_by_name("csharp");
1866 assert!(syntax.is_some(), "alias 'csharp' should resolve");
1867 assert_eq!(syntax.unwrap().name, "C#");
1868
1869 let syntax = registry.find_syntax_by_name("sh");
1871 assert!(syntax.is_some(), "alias 'sh' should resolve");
1872 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1873
1874 let syntax = registry.find_syntax_by_name("proto");
1876 assert!(syntax.is_some(), "alias 'proto' should resolve");
1877 assert_eq!(syntax.unwrap().name, "Protocol Buffers");
1878 }
1879
1880 #[test]
1881 fn test_alias_case_insensitive_input() {
1882 let registry = GrammarRegistry::default();
1883
1884 let syntax = registry.find_syntax_by_name("BASH");
1886 assert!(
1887 syntax.is_some(),
1888 "alias 'BASH' should resolve case-insensitively"
1889 );
1890 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1891
1892 let syntax = registry.find_syntax_by_name("Cpp");
1893 assert!(
1894 syntax.is_some(),
1895 "alias 'Cpp' should resolve case-insensitively"
1896 );
1897 assert_eq!(syntax.unwrap().name, "C++");
1898 }
1899
1900 #[test]
1901 fn test_full_name_still_works() {
1902 let registry = GrammarRegistry::default();
1903
1904 let syntax = registry.find_syntax_by_name("Bourne Again Shell (bash)");
1906 assert!(syntax.is_some(), "full name should still resolve");
1907 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1908
1909 let syntax = registry.find_syntax_by_name("bourne again shell (bash)");
1911 assert!(
1912 syntax.is_some(),
1913 "case-insensitive full name should resolve"
1914 );
1915 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1916 }
1917
1918 #[test]
1919 fn test_alias_does_not_shadow_full_names() {
1920 let registry = GrammarRegistry::default();
1921
1922 let syntax = registry.find_syntax_by_name("rust");
1924 assert!(syntax.is_some());
1925 assert_eq!(syntax.unwrap().name, "Rust");
1926
1927 let syntax = registry.find_syntax_by_name("go");
1929 assert!(syntax.is_some());
1930 assert_eq!(syntax.unwrap().name, "Go");
1931 }
1932
1933 #[test]
1934 fn test_register_alias_rejects_collision() {
1935 let mut registry = GrammarRegistry::default();
1936
1937 assert!(registry.register_alias("myalias", "Rust"));
1939 assert!(!registry.register_alias("myalias", "Go"));
1940
1941 assert!(registry.register_alias("myalias", "Rust"));
1943 }
1944
1945 #[test]
1946 fn test_register_alias_rejects_nonexistent_target() {
1947 let mut registry = GrammarRegistry::default();
1948 assert!(!registry.register_alias("nope", "Nonexistent Grammar"));
1949 }
1950
1951 #[test]
1952 fn test_register_alias_skips_existing_grammar_name() {
1953 let mut registry = GrammarRegistry::default();
1954
1955 assert!(!registry.register_alias("rust", "Rust"));
1957 assert!(registry.find_syntax_by_name("rust").is_some());
1959 }
1960
1961 #[test]
1962 fn test_available_grammar_info_includes_short_names() {
1963 let registry = GrammarRegistry::default();
1964 let infos = registry.available_grammar_info();
1965
1966 let bash_info = infos.iter().find(|g| g.name == "Bourne Again Shell (bash)");
1967 assert!(bash_info.is_some(), "bash grammar should be in the list");
1968 let bash_info = bash_info.unwrap();
1969 assert!(
1970 bash_info.short_name.is_some(),
1971 "bash grammar should have a short_name"
1972 );
1973 assert_eq!(bash_info.short_name.as_deref(), Some("sh"));
1975 }
1976
1977 #[test]
1978 fn test_catalog_contains_each_language_once() {
1979 let registry = GrammarRegistry::default();
1980 let catalog = registry.catalog();
1981
1982 let mut seen = std::collections::HashSet::new();
1984 for entry in catalog {
1985 let key = entry.display_name.to_lowercase();
1986 assert!(
1987 seen.insert(key.clone()),
1988 "duplicate catalog entry for display_name={:?}",
1989 entry.display_name
1990 );
1991 }
1992
1993 let ts = registry
1996 .find_by_name("TypeScript")
1997 .expect("TypeScript must be in the catalog");
1998 assert!(ts.engines.syntect.is_none());
1999 assert_eq!(
2000 ts.engines.tree_sitter,
2001 Some(fresh_languages::Language::TypeScript)
2002 );
2003 assert_eq!(ts.language_id, "typescript");
2004 assert!(ts.extensions.iter().any(|e| e == "ts"));
2005
2006 for name in ["Rust", "Python"] {
2009 let entry = registry
2010 .find_by_name(name)
2011 .unwrap_or_else(|| panic!("{} must be in the catalog", name));
2012 assert!(
2013 entry.engines.syntect.is_some(),
2014 "{} should have a syntect index",
2015 name
2016 );
2017 assert!(
2018 entry.engines.tree_sitter.is_some(),
2019 "{} should also have a tree-sitter language",
2020 name
2021 );
2022 let by_id = registry
2025 .find_by_name(&entry.language_id)
2026 .expect("language_id should resolve");
2027 assert_eq!(by_id.display_name, entry.display_name);
2028 }
2029
2030 let js = registry
2036 .find_by_name("JavaScript")
2037 .expect("JavaScript must be in the catalog");
2038 assert!(
2039 js.engines.syntect.is_none(),
2040 "JavaScript must not be routed to the syntect engine (issue #899)"
2041 );
2042 assert_eq!(
2043 js.engines.tree_sitter,
2044 Some(fresh_languages::Language::JavaScript),
2045 "JavaScript must carry the tree-sitter language"
2046 );
2047
2048 let gdscript = registry
2049 .find_by_path(Path::new("player.gd"), None)
2050 .expect("player.gd should resolve to GDScript");
2051 assert_eq!(gdscript.display_name, "GDScript");
2052 assert_eq!(gdscript.language_id, "gdscript");
2053 assert!(
2054 gdscript.engines.syntect.is_some(),
2055 "GDScript should use the embedded Syntect grammar"
2056 );
2057 assert!(
2058 gdscript.engines.tree_sitter.is_none(),
2059 "GDScript must not carry a tree-sitter parser"
2060 );
2061 }
2062
2063 #[test]
2064 fn test_catalog_find_by_path_and_extension() {
2065 let registry = GrammarRegistry::default();
2066 let ts = registry
2067 .find_by_path(Path::new("foo.ts"), None)
2068 .expect("foo.ts should resolve");
2069 assert_eq!(ts.display_name, "TypeScript");
2070 let rs = registry.find_by_extension("rs").expect("rs should resolve");
2071 assert_eq!(rs.display_name, "Rust");
2072 }
2073
2074 fn lang_cfg(
2076 grammar: &str,
2077 extensions: &[&str],
2078 filenames: &[&str],
2079 ) -> crate::config::LanguageConfig {
2080 crate::config::LanguageConfig {
2081 extensions: extensions.iter().map(|s| s.to_string()).collect(),
2082 filenames: filenames.iter().map(|s| s.to_string()).collect(),
2083 grammar: grammar.to_string(),
2084 comment_prefix: None,
2085 auto_indent: true,
2086 auto_close: None,
2087 auto_surround: None,
2088 textmate_grammar: None,
2089 show_whitespace_tabs: true,
2090 line_wrap: None,
2091 wrap_column: None,
2092 page_view: None,
2093 page_width: None,
2094 use_tabs: None,
2095 tab_size: None,
2096 formatter: None,
2097 format_on_save: false,
2098 on_save: vec![],
2099 word_characters: None,
2100 indent: None,
2101 }
2102 }
2103
2104 #[test]
2108 fn test_user_alias_resolves_via_find_by_name() {
2109 let mut registry = GrammarRegistry::default();
2110 let mut languages = std::collections::HashMap::new();
2111 languages.insert("mylang".to_string(), lang_cfg("Rust", &[], &[]));
2112 registry.apply_language_config(&languages);
2113
2114 let entry = registry
2115 .find_by_name("mylang")
2116 .expect("user-declared alias 'mylang' must resolve");
2117 assert_eq!(entry.display_name, "Rust");
2118 }
2119
2120 #[test]
2124 fn test_register_alias_preserves_applied_language_config() {
2125 let mut registry = GrammarRegistry::default();
2126 let mut languages = std::collections::HashMap::new();
2127 languages.insert(
2128 "shell-configs".to_string(),
2129 lang_cfg("bash", &["myconf"], &["*.myconf"]),
2130 );
2131 registry.apply_language_config(&languages);
2132
2133 assert!(registry.find_by_extension("myconf").is_some());
2135 assert!(
2136 registry
2137 .find_by_path(Path::new("foo.myconf"), None)
2138 .is_some(),
2139 "glob should match before register_alias"
2140 );
2141
2142 registry.register_alias("mycustom", "Rust");
2144
2145 assert!(
2146 registry.find_by_extension("myconf").is_some(),
2147 "config extension must survive register_alias"
2148 );
2149 assert!(
2150 registry
2151 .find_by_path(Path::new("foo.myconf"), None)
2152 .is_some(),
2153 "glob must survive register_alias"
2154 );
2155 }
2156
2157 #[test]
2161 fn test_from_syntax_name_preserves_canonical_display_name() {
2162 use crate::primitives::detected_language::DetectedLanguage;
2163 let registry = GrammarRegistry::default();
2164 let languages = std::collections::HashMap::new();
2165
2166 let detected = DetectedLanguage::from_syntax_name("BASH", ®istry, &languages)
2167 .expect("BASH should resolve via alias");
2168 assert_eq!(
2169 detected.display_name, "Bourne Again Shell (bash)",
2170 "display_name must be canonical, not user-typed"
2171 );
2172 }
2173
2174 #[test]
2178 fn test_config_only_language_appears_in_catalog() {
2179 let mut registry = GrammarRegistry::default();
2180 let mut languages = std::collections::HashMap::new();
2181 languages.insert("fish".to_string(), lang_cfg("fish", &["fish"], &[]));
2183 registry.apply_language_config(&languages);
2184
2185 let entry = registry
2186 .find_by_name("fish")
2187 .expect("fish should be in the catalog after apply_language_config");
2188 assert!(entry.engines.syntect.is_none());
2189 assert!(entry.engines.tree_sitter.is_none());
2190 assert_eq!(entry.language_id, "fish");
2191 assert!(entry.extensions.iter().any(|e| e == "fish"));
2192 }
2193
2194 #[test]
2199 fn test_config_extension_overrides_builtin() {
2200 let mut registry = GrammarRegistry::default();
2201 assert_eq!(
2203 registry.find_by_extension("js").unwrap().display_name,
2204 "JavaScript"
2205 );
2206
2207 let mut languages = std::collections::HashMap::new();
2208 languages.insert(
2209 "ts-overlay".to_string(),
2210 lang_cfg("TypeScript", &["js"], &[]),
2211 );
2212 registry.apply_language_config(&languages);
2213
2214 assert_eq!(
2215 registry.find_by_extension("js").unwrap().display_name,
2216 "TypeScript",
2217 "user-config extension must win over built-in"
2218 );
2219 }
2220
2221 #[test]
2228 fn test_bare_filename_resolves_via_find_by_path() {
2229 let registry = GrammarRegistry::default();
2230 for (filename, expected_substr) in [
2231 ("Gemfile", "ruby"),
2232 ("Rakefile", "ruby"),
2233 ("Vagrantfile", "ruby"),
2234 ("Makefile", "makefile"),
2235 ("GNUmakefile", "makefile"),
2236 ] {
2237 let entry = registry
2238 .find_by_path(Path::new(filename), None)
2239 .unwrap_or_else(|| panic!("{} must resolve via catalog", filename));
2240 assert!(
2241 entry.display_name.to_lowercase().contains(expected_substr),
2242 "{} should resolve to {} grammar, got {}",
2243 filename,
2244 expected_substr,
2245 entry.display_name
2246 );
2247 }
2248 }
2249
2250 #[test]
2255 fn test_jsx_resolves_to_javascript() {
2256 let registry = GrammarRegistry::default();
2257 let entry = registry
2258 .find_by_path(Path::new("foo.jsx"), None)
2259 .expect("foo.jsx must resolve");
2260 assert_eq!(entry.display_name, "JavaScript");
2261 }
2262
2263 #[test]
2268 fn test_rebuild_catalog_replays_language_config() {
2269 let mut registry = GrammarRegistry::default();
2270 let mut languages = std::collections::HashMap::new();
2271 languages.insert(
2272 "myshell".to_string(),
2273 lang_cfg("bash", &["myext"], &["*.myglob"]),
2274 );
2275 registry.apply_language_config(&languages);
2276 assert!(registry.find_by_extension("myext").is_some());
2277 assert!(registry
2278 .find_by_path(Path::new("foo.myglob"), None)
2279 .is_some());
2280
2281 registry.rebuild_catalog();
2284 assert!(
2285 registry.find_by_extension("myext").is_some(),
2286 "rebuild_catalog must replay applied user config"
2287 );
2288 assert!(
2289 registry
2290 .find_by_path(Path::new("foo.myglob"), None)
2291 .is_some(),
2292 "rebuild_catalog must replay user globs"
2293 );
2294 }
2295
2296 #[test]
2299 fn test_apply_language_config_idempotent() {
2300 let mut registry = GrammarRegistry::default();
2301 let mut languages = std::collections::HashMap::new();
2302 languages.insert(
2303 "shell-cfg".to_string(),
2304 lang_cfg("bash", &["myconf"], &["*.myconf"]),
2305 );
2306
2307 registry.apply_language_config(&languages);
2308 let first_extensions = registry
2309 .find_by_name("bash")
2310 .unwrap()
2311 .extensions
2312 .iter()
2313 .filter(|e| e == &"myconf")
2314 .count();
2315 let first_globs = registry
2316 .find_by_name("bash")
2317 .unwrap()
2318 .filename_globs
2319 .iter()
2320 .filter(|g| g == &"*.myconf")
2321 .count();
2322 assert_eq!(first_extensions, 1);
2323 assert_eq!(first_globs, 1);
2324
2325 registry.apply_language_config(&languages);
2327 let second_extensions = registry
2328 .find_by_name("bash")
2329 .unwrap()
2330 .extensions
2331 .iter()
2332 .filter(|e| e == &"myconf")
2333 .count();
2334 let second_globs = registry
2335 .find_by_name("bash")
2336 .unwrap()
2337 .filename_globs
2338 .iter()
2339 .filter(|g| g == &"*.myconf")
2340 .count();
2341 assert_eq!(second_extensions, 1, "extensions must not duplicate");
2342 assert_eq!(second_globs, 1, "globs must not duplicate");
2343 }
2344
2345 #[test]
2351 fn test_julia_adjoint_does_not_start_string() {
2352 use syntect::parsing::{ParseState, ScopeStack};
2353
2354 let registry = GrammarRegistry::default();
2355 let syntax_set = registry.syntax_set();
2356 let syntax = registry
2357 .find_syntax_by_name("Julia")
2358 .expect("Julia grammar must be loaded");
2359 let mut state = ParseState::new(syntax);
2360 let mut scopes = ScopeStack::new();
2361
2362 let lines = ["x = A'\n", "function foo()\n", "end\n"];
2364 let mut keyword_line_in_string = false;
2365 let mut found_function_keyword = false;
2366
2367 for line in &lines {
2368 let ops = state.parse_line(line, syntax_set).unwrap();
2369 let mut op_iter = ops.iter().peekable();
2371 for (byte_idx, _) in line.char_indices() {
2372 while let Some((offset, op)) = op_iter.peek() {
2373 if *offset <= byte_idx {
2374 scopes.apply(op).unwrap();
2375 op_iter.next();
2376 } else {
2377 break;
2378 }
2379 }
2380 let in_string = scopes
2381 .as_slice()
2382 .iter()
2383 .any(|s| s.build_string().starts_with("string."));
2384 let is_function_kw = line[byte_idx..].starts_with("function");
2385 if is_function_kw && in_string {
2386 keyword_line_in_string = true;
2387 }
2388 if is_function_kw && !in_string {
2389 found_function_keyword = true;
2390 }
2391 }
2392 for (_, op) in op_iter {
2394 scopes.apply(op).unwrap();
2395 }
2396 }
2397
2398 assert!(
2399 !keyword_line_in_string,
2400 "the `function` keyword after an adjoint operator must not be inside a string scope"
2401 );
2402 assert!(
2403 found_function_keyword,
2404 "test harness must have reached the `function` keyword"
2405 );
2406 }
2407
2408 #[test]
2411 fn test_julia_char_literal_is_recognized() {
2412 use syntect::parsing::{ParseState, ScopeStack};
2413
2414 let registry = GrammarRegistry::default();
2415 let syntax_set = registry.syntax_set();
2416 let syntax = registry
2417 .find_syntax_by_name("Julia")
2418 .expect("Julia grammar must be loaded");
2419 let mut state = ParseState::new(syntax);
2420 let mut scopes = ScopeStack::new();
2421
2422 let line = "x = 'a'\n";
2423 let ops = state.parse_line(line, syntax_set).unwrap();
2424 let mut saw_constant_or_string_at_quote = false;
2425 let mut op_iter = ops.iter().peekable();
2426 for (byte_idx, _) in line.char_indices() {
2427 while let Some((offset, op)) = op_iter.peek() {
2428 if *offset <= byte_idx {
2429 scopes.apply(op).unwrap();
2430 op_iter.next();
2431 } else {
2432 break;
2433 }
2434 }
2435 if byte_idx == 5 {
2436 let scoped = scopes.as_slice().iter().any(|s| {
2438 let str = s.build_string();
2439 str.starts_with("constant.") || str.starts_with("string.")
2440 });
2441 if scoped {
2442 saw_constant_or_string_at_quote = true;
2443 }
2444 }
2445 }
2446 assert!(
2447 saw_constant_or_string_at_quote,
2448 "char literal 'a' must receive a constant/string scope"
2449 );
2450 }
2451
2452 #[test]
2456 fn test_tree_sitter_bridge() {
2457 assert_eq!(
2458 tree_sitter_for_syntect_name("Bourne Again Shell (bash)"),
2459 Some(fresh_languages::Language::Bash)
2460 );
2461 assert_eq!(
2462 tree_sitter_for_syntect_name("Rust"),
2463 Some(fresh_languages::Language::Rust)
2464 );
2465 assert_eq!(tree_sitter_for_syntect_name("GDScript"), None);
2466 assert_eq!(tree_sitter_for_syntect_name("Nushell"), None);
2468 assert_eq!(tree_sitter_for_syntect_name("does-not-exist"), None);
2470 }
2471}