1use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10use syntect::parsing::{SyntaxDefinition, SyntaxReference, SyntaxSet, SyntaxSetBuilder};
11
12pub use crate::primitives::glob_match::{
14 filename_glob_matches, is_glob_pattern, is_path_pattern, path_glob_matches,
15};
16
17#[derive(Clone, Debug)]
22pub struct GrammarSpec {
23 pub language: String,
25 pub path: PathBuf,
27 pub extensions: Vec<String>,
29}
30
31#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
33#[serde(tag = "type")]
34pub enum GrammarSource {
35 #[serde(rename = "built-in")]
37 BuiltIn,
38 #[serde(rename = "user")]
40 User { path: PathBuf },
41 #[serde(rename = "language-pack")]
43 LanguagePack { name: String, path: PathBuf },
44 #[serde(rename = "bundle")]
46 Bundle { name: String, path: PathBuf },
47 #[serde(rename = "plugin")]
49 Plugin { plugin: String, path: PathBuf },
50}
51
52impl std::fmt::Display for GrammarSource {
53 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54 match self {
55 GrammarSource::BuiltIn => write!(f, "built-in"),
56 GrammarSource::User { path } => write!(f, "user ({})", path.display()),
57 GrammarSource::LanguagePack { name, .. } => write!(f, "language-pack ({})", name),
58 GrammarSource::Bundle { name, .. } => write!(f, "bundle ({})", name),
59 GrammarSource::Plugin { plugin, .. } => write!(f, "plugin ({})", plugin),
60 }
61 }
62}
63
64#[derive(Clone, Debug, Serialize, Deserialize)]
66pub struct GrammarInfo {
67 pub name: String,
69 pub source: GrammarSource,
71 pub file_extensions: Vec<String>,
73 #[serde(default, skip_serializing_if = "Option::is_none")]
75 pub short_name: Option<String>,
76}
77
78const SYNTECT_TO_TREE_SITTER_ALIASES: &[(&str, fresh_languages::Language)] =
86 &[("Bourne Again Shell (bash)", fresh_languages::Language::Bash)];
87
88fn tree_sitter_for_syntect_name(display_name: &str) -> Option<fresh_languages::Language> {
91 for (syntect_name, lang) in SYNTECT_TO_TREE_SITTER_ALIASES {
92 if *syntect_name == display_name {
93 return Some(*lang);
94 }
95 }
96 fresh_languages::Language::all()
97 .iter()
98 .find(|l| l.display_name() == display_name)
99 .copied()
100}
101
102#[derive(Clone, Debug, Default)]
107pub struct GrammarEngines {
108 pub syntect: Option<usize>,
111 pub tree_sitter: Option<fresh_languages::Language>,
113}
114
115#[derive(Clone, Debug)]
124pub struct GrammarEntry {
125 pub display_name: String,
127 pub language_id: String,
129 pub short_name: Option<String>,
131 pub extensions: Vec<String>,
133 pub filenames: Vec<String>,
135 pub filename_globs: Vec<String>,
137 pub source: GrammarSource,
139 pub engines: GrammarEngines,
141}
142
143pub const TOML_GRAMMAR: &str = include_str!("../../grammars/toml.sublime-syntax");
145
146pub const ODIN_GRAMMAR: &str = include_str!("../../grammars/odin/Odin.sublime-syntax");
149
150pub const ZIG_GRAMMAR: &str = include_str!("../../grammars/zig.sublime-syntax");
152
153pub const GIT_REBASE_GRAMMAR: &str = include_str!("../../grammars/git-rebase.sublime-syntax");
155
156pub const GIT_COMMIT_GRAMMAR: &str = include_str!("../../grammars/git-commit.sublime-syntax");
158
159pub const GITIGNORE_GRAMMAR: &str = include_str!("../../grammars/gitignore.sublime-syntax");
161
162pub const GITCONFIG_GRAMMAR: &str = include_str!("../../grammars/gitconfig.sublime-syntax");
164
165pub const GITATTRIBUTES_GRAMMAR: &str = include_str!("../../grammars/gitattributes.sublime-syntax");
167
168pub const TYPST_GRAMMAR: &str = include_str!("../../grammars/typst.sublime-syntax");
170
171pub const DOCKERFILE_GRAMMAR: &str = include_str!("../../grammars/dockerfile.sublime-syntax");
173pub const INI_GRAMMAR: &str = include_str!("../../grammars/ini.sublime-syntax");
175pub const CMAKE_GRAMMAR: &str = include_str!("../../grammars/cmake.sublime-syntax");
177pub const SCSS_GRAMMAR: &str = include_str!("../../grammars/scss.sublime-syntax");
179pub const LESS_GRAMMAR: &str = include_str!("../../grammars/less.sublime-syntax");
181pub const POWERSHELL_GRAMMAR: &str = include_str!("../../grammars/powershell.sublime-syntax");
183pub const KOTLIN_GRAMMAR: &str = include_str!("../../grammars/kotlin.sublime-syntax");
185pub const SWIFT_GRAMMAR: &str = include_str!("../../grammars/swift.sublime-syntax");
187pub const DART_GRAMMAR: &str = include_str!("../../grammars/dart.sublime-syntax");
189pub const ELIXIR_GRAMMAR: &str = include_str!("../../grammars/elixir.sublime-syntax");
191pub const FSHARP_GRAMMAR: &str = include_str!("../../grammars/fsharp.sublime-syntax");
193pub const NIX_GRAMMAR: &str = include_str!("../../grammars/nix.sublime-syntax");
195pub const HCL_GRAMMAR: &str = include_str!("../../grammars/hcl.sublime-syntax");
197pub const PROTOBUF_GRAMMAR: &str = include_str!("../../grammars/protobuf.sublime-syntax");
199pub const GRAPHQL_GRAMMAR: &str = include_str!("../../grammars/graphql.sublime-syntax");
201pub const JULIA_GRAMMAR: &str = include_str!("../../grammars/julia.sublime-syntax");
203pub const NIM_GRAMMAR: &str = include_str!("../../grammars/nim.sublime-syntax");
205pub const GLEAM_GRAMMAR: &str = include_str!("../../grammars/gleam.sublime-syntax");
207pub const VLANG_GRAMMAR: &str = include_str!("../../grammars/vlang.sublime-syntax");
209pub const SOLIDITY_GRAMMAR: &str = include_str!("../../grammars/solidity.sublime-syntax");
211pub const KDL_GRAMMAR: &str = include_str!("../../grammars/kdl.sublime-syntax");
213pub const NUSHELL_GRAMMAR: &str = include_str!("../../grammars/nushell.sublime-syntax");
215pub const STARLARK_GRAMMAR: &str = include_str!("../../grammars/starlark.sublime-syntax");
217pub const JUSTFILE_GRAMMAR: &str = include_str!("../../grammars/justfile.sublime-syntax");
219pub const EARTHFILE_GRAMMAR: &str = include_str!("../../grammars/earthfile.sublime-syntax");
221pub const GOMOD_GRAMMAR: &str = include_str!("../../grammars/gomod.sublime-syntax");
223pub const VUE_GRAMMAR: &str = include_str!("../../grammars/vue.sublime-syntax");
225pub const SVELTE_GRAMMAR: &str = include_str!("../../grammars/svelte.sublime-syntax");
227pub const ASTRO_GRAMMAR: &str = include_str!("../../grammars/astro.sublime-syntax");
229pub const HYPRLANG_GRAMMAR: &str = include_str!("../../grammars/hyprlang.sublime-syntax");
231pub const AUTOHOTKEY_GRAMMAR: &str =
234 include_str!("../../grammars/autohotkey/AutoHotkey.sublime-syntax");
235pub const RACKET_GRAMMAR: &str = include_str!("../../grammars/racket.sublime-syntax");
237pub const VERILOG_GRAMMAR: &str = include_str!("../../grammars/verilog.sublime-syntax");
239pub const SYSTEMVERILOG_GRAMMAR: &str = include_str!("../../grammars/systemverilog.sublime-syntax");
241pub const VHDL_GRAMMAR: &str = include_str!("../../grammars/vhdl.sublime-syntax");
243
244pub const C3_GRAMMAR: &str = include_str!("../../grammars/c3.sublime-syntax");
245
246impl std::fmt::Debug for GrammarRegistry {
251 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
252 f.debug_struct("GrammarRegistry")
253 .field("syntax_count", &self.syntax_set.syntaxes().len())
254 .finish()
255 }
256}
257
258pub struct GrammarRegistry {
259 syntax_set: Arc<SyntaxSet>,
261 user_extensions: HashMap<String, String>,
263 filename_scopes: HashMap<String, String>,
265 loaded_grammar_paths: Vec<GrammarSpec>,
267 grammar_sources: HashMap<String, GrammarInfo>,
269 aliases: HashMap<String, String>,
273 catalog: Vec<GrammarEntry>,
277 catalog_by_name: HashMap<String, usize>,
280 catalog_by_extension: HashMap<String, usize>,
282 catalog_by_filename: HashMap<String, usize>,
284 applied_language_config: HashMap<String, crate::config::LanguageConfig>,
289 catalog_gen: u64,
293}
294
295impl GrammarRegistry {
296 pub(crate) fn new(
301 syntax_set: SyntaxSet,
302 user_extensions: HashMap<String, String>,
303 filename_scopes: HashMap<String, String>,
304 ) -> Self {
305 Self::new_with_loaded_paths(
306 syntax_set,
307 user_extensions,
308 filename_scopes,
309 Vec::new(),
310 HashMap::new(),
311 )
312 }
313
314 pub(crate) fn new_with_loaded_paths(
319 syntax_set: SyntaxSet,
320 user_extensions: HashMap<String, String>,
321 filename_scopes: HashMap<String, String>,
322 loaded_grammar_paths: Vec<GrammarSpec>,
323 grammar_sources: HashMap<String, GrammarInfo>,
324 ) -> Self {
325 let mut reg = Self {
326 syntax_set: Arc::new(syntax_set),
327 user_extensions,
328 filename_scopes,
329 loaded_grammar_paths,
330 grammar_sources,
331 aliases: HashMap::new(),
332 catalog: Vec::new(),
333 catalog_by_name: HashMap::new(),
334 catalog_by_extension: HashMap::new(),
335 catalog_by_filename: HashMap::new(),
336 applied_language_config: HashMap::new(),
337 catalog_gen: 0,
338 };
339 reg.rebuild_catalog();
340 reg
341 }
342
343 pub fn empty() -> Arc<Self> {
345 let mut builder = SyntaxSetBuilder::new();
346 builder.add_plain_text_syntax();
347 let mut reg = Self {
348 syntax_set: Arc::new(builder.build()),
349 user_extensions: HashMap::new(),
350 filename_scopes: HashMap::new(),
351 loaded_grammar_paths: Vec::new(),
352 grammar_sources: HashMap::new(),
353 aliases: HashMap::new(),
354 catalog: Vec::new(),
355 catalog_by_name: HashMap::new(),
356 catalog_by_extension: HashMap::new(),
357 catalog_by_filename: HashMap::new(),
358 applied_language_config: HashMap::new(),
359 catalog_gen: 0,
360 };
361 reg.rebuild_catalog();
362 Arc::new(reg)
363 }
364
365 pub fn defaults_only() -> Arc<Self> {
372 tracing::info!("defaults_only: loading pre-compiled syntax packdump...");
376 let syntax_set: SyntaxSet = syntect::dumps::from_uncompressed_data(include_bytes!(
377 concat!(env!("OUT_DIR"), "/default_syntaxes.packdump")
378 ))
379 .expect("Failed to load pre-compiled syntax packdump");
380 tracing::info!(
381 "defaults_only: loaded ({} syntaxes)",
382 syntax_set.syntaxes().len()
383 );
384 let grammar_sources = Self::build_grammar_sources_from_syntax_set(&syntax_set);
385 let filename_scopes = Self::build_filename_scopes();
386 let extra_extensions = Self::build_extra_extensions();
387 let mut registry = Self {
388 syntax_set: Arc::new(syntax_set),
389 user_extensions: extra_extensions,
390 filename_scopes,
391 loaded_grammar_paths: Vec::new(),
392 grammar_sources,
393 aliases: HashMap::new(),
394 catalog: Vec::new(),
395 catalog_by_name: HashMap::new(),
396 catalog_by_extension: HashMap::new(),
397 catalog_by_filename: HashMap::new(),
398 applied_language_config: HashMap::new(),
399 catalog_gen: 0,
400 };
401 registry.populate_built_in_aliases();
402 registry.rebuild_catalog();
403 Arc::new(registry)
404 }
405
406 pub(crate) fn build_extra_extensions() -> HashMap<String, String> {
411 let mut map = HashMap::new();
412
413 let js_scope = "source.js".to_string();
415 map.insert("cjs".to_string(), js_scope.clone());
416 map.insert("mjs".to_string(), js_scope);
417
418 map
422 }
423
424 pub(crate) fn build_filename_scopes() -> HashMap<String, String> {
426 let mut map = HashMap::new();
427
428 let shell_scope = "source.shell.bash".to_string();
430 for filename in [
431 ".zshrc",
432 ".zprofile",
433 ".zshenv",
434 ".zlogin",
435 ".zlogout",
436 ".bash_aliases",
437 "PKGBUILD",
440 "APKBUILD",
441 ] {
442 map.insert(filename.to_string(), shell_scope.clone());
443 }
444
445 let git_rebase_scope = "source.git-rebase-todo".to_string();
447 map.insert("git-rebase-todo".to_string(), git_rebase_scope);
448
449 let git_commit_scope = "source.git-commit".to_string();
451 for filename in ["COMMIT_EDITMSG", "MERGE_MSG", "SQUASH_MSG", "TAG_EDITMSG"] {
452 map.insert(filename.to_string(), git_commit_scope.clone());
453 }
454
455 let gitignore_scope = "source.gitignore".to_string();
457 for filename in [".gitignore", ".dockerignore", ".npmignore", ".hgignore"] {
458 map.insert(filename.to_string(), gitignore_scope.clone());
459 }
460
461 let gitconfig_scope = "source.gitconfig".to_string();
463 for filename in [".gitconfig", ".gitmodules"] {
464 map.insert(filename.to_string(), gitconfig_scope.clone());
465 }
466
467 let gitattributes_scope = "source.gitattributes".to_string();
469 map.insert(".gitattributes".to_string(), gitattributes_scope);
470
471 let groovy_scope = "source.groovy".to_string();
473 map.insert("Jenkinsfile".to_string(), groovy_scope);
474
475 let ruby_scope = "source.ruby".to_string();
478 map.insert("Brewfile".to_string(), ruby_scope);
479
480 let dockerfile_scope = "source.dockerfile".to_string();
482 map.insert("Dockerfile".to_string(), dockerfile_scope.clone());
483 map.insert("Containerfile".to_string(), dockerfile_scope.clone());
484 map.insert("Dockerfile.dev".to_string(), dockerfile_scope.clone());
486 map.insert("Dockerfile.prod".to_string(), dockerfile_scope.clone());
487 map.insert("Dockerfile.test".to_string(), dockerfile_scope.clone());
488 map.insert("Dockerfile.build".to_string(), dockerfile_scope.clone());
489
490 let cmake_scope = "source.cmake".to_string();
492 map.insert("CMakeLists.txt".to_string(), cmake_scope);
493
494 let starlark_scope = "source.starlark".to_string();
496 map.insert("BUILD".to_string(), starlark_scope.clone());
497 map.insert("BUILD.bazel".to_string(), starlark_scope.clone());
498 map.insert("WORKSPACE".to_string(), starlark_scope.clone());
499 map.insert("WORKSPACE.bazel".to_string(), starlark_scope.clone());
500 map.insert("Tiltfile".to_string(), starlark_scope);
501
502 let justfile_scope = "source.justfile".to_string();
504 map.insert("justfile".to_string(), justfile_scope.clone());
505 map.insert("Justfile".to_string(), justfile_scope.clone());
506 map.insert(".justfile".to_string(), justfile_scope);
507
508 let ini_scope = "source.ini".to_string();
510 map.insert(".editorconfig".to_string(), ini_scope);
511
512 let earthfile_scope = "source.earthfile".to_string();
514 map.insert("Earthfile".to_string(), earthfile_scope);
515
516 let hyprlang_scope = "source.hyprlang".to_string();
518 map.insert("hyprland.conf".to_string(), hyprlang_scope.clone());
519 map.insert("hyprpaper.conf".to_string(), hyprlang_scope.clone());
520 map.insert("hyprlock.conf".to_string(), hyprlang_scope);
521
522 let gomod_scope = "source.gomod".to_string();
524 map.insert("go.mod".to_string(), gomod_scope.clone());
525 map.insert("go.sum".to_string(), gomod_scope);
526
527 map
528 }
529
530 pub(crate) fn add_embedded_grammars(builder: &mut SyntaxSetBuilder) {
532 match SyntaxDefinition::load_from_str(TOML_GRAMMAR, true, Some("TOML")) {
534 Ok(syntax) => {
535 builder.add(syntax);
536 tracing::debug!("Loaded embedded TOML grammar");
537 }
538 Err(e) => {
539 tracing::warn!("Failed to load embedded TOML grammar: {}", e);
540 }
541 }
542
543 match SyntaxDefinition::load_from_str(ODIN_GRAMMAR, true, Some("Odin")) {
545 Ok(syntax) => {
546 builder.add(syntax);
547 tracing::debug!("Loaded embedded Odin grammar");
548 }
549 Err(e) => {
550 tracing::warn!("Failed to load embedded Odin grammar: {}", e);
551 }
552 }
553
554 match SyntaxDefinition::load_from_str(ZIG_GRAMMAR, true, Some("Zig")) {
556 Ok(syntax) => {
557 builder.add(syntax);
558 tracing::debug!("Loaded embedded Zig grammar");
559 }
560 Err(e) => {
561 tracing::warn!("Failed to load embedded Zig grammar: {}", e);
562 }
563 }
564
565 match SyntaxDefinition::load_from_str(GIT_REBASE_GRAMMAR, true, Some("Git Rebase Todo")) {
567 Ok(syntax) => {
568 builder.add(syntax);
569 tracing::debug!("Loaded embedded Git Rebase Todo grammar");
570 }
571 Err(e) => {
572 tracing::warn!("Failed to load embedded Git Rebase Todo grammar: {}", e);
573 }
574 }
575
576 match SyntaxDefinition::load_from_str(GIT_COMMIT_GRAMMAR, true, Some("Git Commit Message"))
578 {
579 Ok(syntax) => {
580 builder.add(syntax);
581 tracing::debug!("Loaded embedded Git Commit Message grammar");
582 }
583 Err(e) => {
584 tracing::warn!("Failed to load embedded Git Commit Message grammar: {}", e);
585 }
586 }
587
588 match SyntaxDefinition::load_from_str(GITIGNORE_GRAMMAR, true, Some("Gitignore")) {
590 Ok(syntax) => {
591 builder.add(syntax);
592 tracing::debug!("Loaded embedded Gitignore grammar");
593 }
594 Err(e) => {
595 tracing::warn!("Failed to load embedded Gitignore grammar: {}", e);
596 }
597 }
598
599 match SyntaxDefinition::load_from_str(GITCONFIG_GRAMMAR, true, Some("Git Config")) {
601 Ok(syntax) => {
602 builder.add(syntax);
603 tracing::debug!("Loaded embedded Git Config grammar");
604 }
605 Err(e) => {
606 tracing::warn!("Failed to load embedded Git Config grammar: {}", e);
607 }
608 }
609
610 match SyntaxDefinition::load_from_str(GITATTRIBUTES_GRAMMAR, true, Some("Git Attributes")) {
612 Ok(syntax) => {
613 builder.add(syntax);
614 tracing::debug!("Loaded embedded Git Attributes grammar");
615 }
616 Err(e) => {
617 tracing::warn!("Failed to load embedded Git Attributes grammar: {}", e);
618 }
619 }
620
621 match SyntaxDefinition::load_from_str(TYPST_GRAMMAR, true, Some("Typst")) {
623 Ok(syntax) => {
624 builder.add(syntax);
625 tracing::debug!("Loaded embedded Typst grammar");
626 }
627 Err(e) => {
628 tracing::warn!("Failed to load embedded Typst grammar: {}", e);
629 }
630 }
631
632 let additional_grammars: &[(&str, &str)] = &[
634 (DOCKERFILE_GRAMMAR, "Dockerfile"),
635 (INI_GRAMMAR, "INI"),
636 (CMAKE_GRAMMAR, "CMake"),
637 (SCSS_GRAMMAR, "SCSS"),
638 (LESS_GRAMMAR, "LESS"),
639 (POWERSHELL_GRAMMAR, "PowerShell"),
640 (KOTLIN_GRAMMAR, "Kotlin"),
641 (SWIFT_GRAMMAR, "Swift"),
642 (DART_GRAMMAR, "Dart"),
643 (ELIXIR_GRAMMAR, "Elixir"),
644 (FSHARP_GRAMMAR, "FSharp"),
645 (NIX_GRAMMAR, "Nix"),
646 (HCL_GRAMMAR, "HCL"),
647 (PROTOBUF_GRAMMAR, "Protocol Buffers"),
648 (GRAPHQL_GRAMMAR, "GraphQL"),
649 (JULIA_GRAMMAR, "Julia"),
650 (NIM_GRAMMAR, "Nim"),
651 (GLEAM_GRAMMAR, "Gleam"),
652 (VLANG_GRAMMAR, "V"),
653 (SOLIDITY_GRAMMAR, "Solidity"),
654 (KDL_GRAMMAR, "KDL"),
655 (NUSHELL_GRAMMAR, "Nushell"),
656 (STARLARK_GRAMMAR, "Starlark"),
657 (JUSTFILE_GRAMMAR, "Justfile"),
658 (EARTHFILE_GRAMMAR, "Earthfile"),
659 (GOMOD_GRAMMAR, "Go Module"),
660 (VUE_GRAMMAR, "Vue"),
661 (SVELTE_GRAMMAR, "Svelte"),
662 (ASTRO_GRAMMAR, "Astro"),
663 (HYPRLANG_GRAMMAR, "Hyprlang"),
664 (AUTOHOTKEY_GRAMMAR, "AutoHotkey"),
665 (RACKET_GRAMMAR, "Racket"),
666 (VERILOG_GRAMMAR, "Verilog"),
667 (SYSTEMVERILOG_GRAMMAR, "SystemVerilog"),
668 (VHDL_GRAMMAR, "VHDL"),
669 (C3_GRAMMAR, "C3"),
670 ];
671
672 for (grammar_str, name) in additional_grammars {
673 match SyntaxDefinition::load_from_str(grammar_str, true, Some(name)) {
674 Ok(syntax) => {
675 builder.add(syntax);
676 tracing::debug!("Loaded embedded {} grammar", name);
677 }
678 Err(e) => {
679 tracing::warn!("Failed to load embedded {} grammar: {}", name, e);
680 }
681 }
682 }
683 }
684
685 pub fn find_syntax_for_file(&self, path: &Path) -> Option<&SyntaxReference> {
691 let entry = self.find_by_path(path, None)?;
692 entry
693 .engines
694 .syntect
695 .map(|i| &self.syntax_set.syntaxes()[i])
696 }
697
698 pub fn find_syntax_by_name(&self, name: &str) -> Option<&SyntaxReference> {
706 if let Some(entry) = self.find_by_name(name) {
707 if let Some(idx) = entry.engines.syntect {
708 return Some(&self.syntax_set.syntaxes()[idx]);
709 }
710 }
711 self.syntax_set.find_syntax_by_name(name)
715 }
716
717 fn built_in_aliases() -> Vec<(&'static str, &'static str)> {
726 vec![
727 ("bash", "Bourne Again Shell (bash)"),
729 ("shell", "Bourne Again Shell (bash)"),
730 ("sh", "Bourne Again Shell (bash)"),
731 ("c++", "C++"),
732 ("cpp", "C++"),
733 ("csharp", "C#"),
734 ("objc", "Objective-C"),
735 ("objcpp", "Objective-C++"),
736 ("regex", "Regular Expressions (Python)"),
737 ("regexp", "Regular Expressions (Python)"),
738 ("proto", "Protocol Buffers"),
740 ("protobuf", "Protocol Buffers"),
741 ("gomod", "Go Module"),
742 ("git-rebase", "Git Rebase Todo"),
743 ("git-commit", "Git Commit Message"),
744 ("git-config", "Git Config"),
745 ("git-attributes", "Git Attributes"),
746 ("gitignore", "Gitignore"),
747 ("fsharp", "FSharp"),
748 ("f#", "FSharp"),
749 ("terraform", "HCL"),
750 ("tf", "HCL"),
751 ("ts", "TypeScript"),
752 ("js", "JavaScript"),
753 ("py", "Python"),
754 ("rb", "Ruby"),
755 ("rs", "Rust"),
756 ("md", "Markdown"),
757 ("yml", "YAML"),
758 ("dockerfile", "Dockerfile"),
759 ]
760 }
761
762 pub(crate) fn populate_built_in_aliases(&mut self) {
769 for (short, full) in Self::built_in_aliases() {
770 self.register_alias_inner(short, full, true);
771 }
772 self.rebuild_catalog();
773 }
774
775 pub(crate) fn register_alias(&mut self, short_name: &str, full_name: &str) -> bool {
785 if !self.register_alias_inner(short_name, full_name, false) {
786 return false;
787 }
788 let short_lower = short_name.to_lowercase();
789 let full_lower = full_name.to_lowercase();
790 if let Some(&idx) = self.catalog_by_name.get(&full_lower) {
791 self.catalog_by_name
792 .entry(short_lower.clone())
793 .or_insert(idx);
794 let entry = &mut self.catalog[idx];
795 let replace = match &entry.short_name {
796 None => true,
797 Some(existing) => short_name.len() < existing.len(),
798 };
799 if replace {
800 entry.short_name = Some(short_lower);
801 }
802 }
803 true
804 }
805
806 fn register_alias_inner(
807 &mut self,
808 short_name: &str,
809 full_name: &str,
810 is_built_in: bool,
811 ) -> bool {
812 let short_lower = short_name.to_lowercase();
813
814 let target_exists = self
816 .syntax_set
817 .syntaxes()
818 .iter()
819 .any(|s| s.name.eq_ignore_ascii_case(full_name));
820 if !target_exists {
821 if tree_sitter_for_syntect_name(full_name).is_some() {
825 return false;
826 }
827 if is_built_in {
828 tracing::warn!(
831 "[grammar-alias] Built-in alias '{}' -> '{}': target grammar not found, skipping",
832 short_name, full_name
833 );
834 } else {
835 tracing::warn!(
836 "[grammar-alias] Alias '{}' -> '{}': target grammar not found, skipping",
837 short_name,
838 full_name
839 );
840 }
841 return false;
842 }
843
844 let collides_with_full_name = self
846 .syntax_set
847 .syntaxes()
848 .iter()
849 .any(|s| s.name.eq_ignore_ascii_case(&short_lower));
850 if collides_with_full_name {
851 tracing::debug!(
855 "[grammar-alias] Alias '{}' matches an existing grammar name, skipping (not needed)",
856 short_name
857 );
858 return false;
859 }
860
861 if let Some(existing_target) = self.aliases.get(&short_lower) {
863 if existing_target.eq_ignore_ascii_case(full_name) {
864 return true;
866 }
867 let msg = format!(
868 "Alias '{}' already maps to '{}', cannot remap to '{}'",
869 short_name, existing_target, full_name
870 );
871 if is_built_in {
872 panic!("[grammar-alias] Built-in alias collision: {}", msg);
873 } else {
874 tracing::warn!("[grammar-alias] {}", msg);
875 return false;
876 }
877 }
878
879 let exact_name = self
881 .syntax_set
882 .syntaxes()
883 .iter()
884 .find(|s| s.name.eq_ignore_ascii_case(full_name))
885 .map(|s| s.name.clone())
886 .unwrap();
887
888 self.aliases.insert(short_lower, exact_name);
889 true
890 }
891
892 pub(crate) fn rebuild_catalog(&mut self) {
907 let mut short_by_full: HashMap<String, String> = HashMap::new();
914 let record = |map: &mut HashMap<String, String>, short: &str, full: &str| {
915 let key = full.to_lowercase();
916 let keep = match map.get(&key) {
917 None => true,
918 Some(existing) => short.len() < existing.len(),
919 };
920 if keep {
921 map.insert(key, short.to_string());
922 }
923 };
924 for (short, full) in Self::built_in_aliases() {
925 record(&mut short_by_full, short, full);
926 }
927 for (short, full) in &self.aliases {
928 record(&mut short_by_full, short, full);
929 }
930
931 let derive_language_id =
932 |display_name: &str| -> (String, Option<fresh_languages::Language>) {
933 let ts = tree_sitter_for_syntect_name(display_name);
934 let id = ts
935 .map(|l| l.id().to_string())
936 .unwrap_or_else(|| display_name.to_lowercase());
937 (id, ts)
938 };
939
940 let mut catalog: Vec<GrammarEntry> = Vec::new();
941 let mut scope_to_index: HashMap<String, usize> = HashMap::new();
942
943 for (idx, syntax) in self.syntax_set.syntaxes().iter().enumerate() {
964 if syntax.name == "Plain Text" || syntax.name == "JavaScript" {
965 continue;
966 }
967 let (language_id, tree_sitter) = derive_language_id(&syntax.name);
968 let short_name = short_by_full.get(&syntax.name.to_lowercase()).cloned();
969 let source = self
970 .grammar_sources
971 .get(&syntax.name)
972 .map(|info| info.source.clone())
973 .unwrap_or(GrammarSource::BuiltIn);
974 let entry_index = catalog.len();
975 scope_to_index.insert(syntax.scope.to_string(), entry_index);
976
977 let mut extensions = syntax.file_extensions.clone();
983 if let Some(lang) = tree_sitter {
984 for ext in lang.extensions() {
985 let ext = ext.to_string();
986 if !extensions.iter().any(|e| e == &ext) {
987 extensions.push(ext);
988 }
989 }
990 }
991
992 catalog.push(GrammarEntry {
993 display_name: syntax.name.clone(),
994 language_id,
995 short_name,
996 extensions,
997 filenames: Vec::new(),
998 filename_globs: Vec::new(),
999 source,
1000 engines: GrammarEngines {
1001 syntect: Some(idx),
1002 tree_sitter,
1003 },
1004 });
1005 }
1006
1007 for (filename, scope) in &self.filename_scopes {
1009 if let Some(&idx) = scope_to_index.get(scope) {
1010 if !catalog[idx].filenames.iter().any(|f| f == filename) {
1011 catalog[idx].filenames.push(filename.clone());
1012 }
1013 }
1014 }
1015
1016 for (ext, scope) in &self.user_extensions {
1018 if let Some(&idx) = scope_to_index.get(scope) {
1019 if !catalog[idx].extensions.iter().any(|e| e == ext) {
1020 catalog[idx].extensions.push(ext.clone());
1021 }
1022 }
1023 }
1024
1025 let mut ts_covered: std::collections::HashSet<fresh_languages::Language> =
1030 std::collections::HashSet::new();
1031 for entry in &catalog {
1032 if let Some(lang) = entry.engines.tree_sitter {
1033 ts_covered.insert(lang);
1034 }
1035 }
1036 for lang in fresh_languages::Language::all() {
1037 if ts_covered.contains(lang) {
1038 continue;
1039 }
1040 let display_name = lang.display_name().to_string();
1041 let language_id = lang.id().to_string();
1042 let short_name = short_by_full.get(&display_name.to_lowercase()).cloned();
1043 let extensions: Vec<String> = lang.extensions().iter().map(|s| s.to_string()).collect();
1044 catalog.push(GrammarEntry {
1045 display_name,
1046 language_id,
1047 short_name,
1048 extensions,
1049 filenames: Vec::new(),
1050 filename_globs: Vec::new(),
1051 source: GrammarSource::BuiltIn,
1052 engines: GrammarEngines {
1053 syntect: None,
1054 tree_sitter: Some(*lang),
1055 },
1056 });
1057 }
1058
1059 let mut by_name: HashMap<String, usize> = HashMap::new();
1067 let mut by_extension: HashMap<String, usize> = HashMap::new();
1068 let mut by_filename: HashMap<String, usize> = HashMap::new();
1069 for (idx, entry) in catalog.iter().enumerate() {
1070 by_name.insert(entry.display_name.to_lowercase(), idx);
1071 by_name.insert(entry.language_id.to_lowercase(), idx);
1072 if let Some(short) = &entry.short_name {
1073 by_name.insert(short.to_lowercase(), idx);
1074 }
1075 for ext in &entry.extensions {
1076 by_extension.entry(ext.to_lowercase()).or_insert(idx);
1077 by_filename.entry(ext.clone()).or_insert(idx);
1078 }
1079 for filename in &entry.filenames {
1080 by_filename.entry(filename.clone()).or_insert(idx);
1081 }
1082 }
1083
1084 self.catalog = catalog;
1085 self.catalog_by_name = by_name;
1086 self.catalog_by_extension = by_extension;
1087 self.catalog_by_filename = by_filename;
1088
1089 if !self.applied_language_config.is_empty() {
1093 let cfg = std::mem::take(&mut self.applied_language_config);
1094 self.apply_language_config_inner(&cfg);
1095 self.applied_language_config = cfg;
1096 }
1097 self.catalog_gen = self.catalog_gen.wrapping_add(1);
1098 }
1099
1100 pub fn catalog(&self) -> &[GrammarEntry] {
1102 &self.catalog
1103 }
1104
1105 pub fn catalog_gen(&self) -> u64 {
1109 self.catalog_gen
1110 }
1111
1112 pub fn find_by_name(&self, name: &str) -> Option<&GrammarEntry> {
1118 self.catalog_by_name
1119 .get(&name.to_lowercase())
1120 .map(|&idx| &self.catalog[idx])
1121 }
1122
1123 pub fn find_by_path(&self, path: &Path, first_line: Option<&str>) -> Option<&GrammarEntry> {
1144 let filename = path.file_name().and_then(|n| n.to_str());
1145 let path_str = path.to_str().unwrap_or("");
1146
1147 if let Some(name) = filename {
1148 if let Some(&idx) = self.catalog_by_filename.get(name) {
1149 return Some(&self.catalog[idx]);
1150 }
1151 }
1152
1153 if let Some(name) = filename {
1155 for entry in &self.catalog {
1156 for pattern in &entry.filename_globs {
1157 let matched = if is_path_pattern(pattern) {
1158 path_glob_matches(pattern, path_str)
1159 } else {
1160 filename_glob_matches(pattern, name)
1161 };
1162 if matched {
1163 return Some(entry);
1164 }
1165 }
1166 }
1167 }
1168
1169 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1170 if let Some(entry) = self.find_by_extension(ext) {
1171 return Some(entry);
1172 }
1173 }
1174
1175 let line = first_line?;
1180 let syntax = self.syntax_set.find_syntax_by_first_line(line)?;
1181 self.find_by_name(&syntax.name)
1182 }
1183
1184 pub fn find_by_extension(&self, ext: &str) -> Option<&GrammarEntry> {
1186 self.catalog_by_extension
1187 .get(&ext.to_lowercase())
1188 .map(|&idx| &self.catalog[idx])
1189 }
1190
1191 pub fn apply_language_config(
1204 &mut self,
1205 languages: &HashMap<String, crate::config::LanguageConfig>,
1206 ) {
1207 self.applied_language_config = languages.clone();
1208 self.apply_language_config_inner(languages);
1209 self.catalog_gen = self.catalog_gen.wrapping_add(1);
1210 }
1211
1212 fn apply_language_config_inner(
1217 &mut self,
1218 languages: &HashMap<String, crate::config::LanguageConfig>,
1219 ) {
1220 for (lang_id, lang_cfg) in languages {
1221 let grammar_name = if lang_cfg.grammar.is_empty() {
1222 lang_id.as_str()
1223 } else {
1224 lang_cfg.grammar.as_str()
1225 };
1226
1227 let idx = self
1229 .catalog_by_name
1230 .get(&grammar_name.to_lowercase())
1231 .copied()
1232 .or_else(|| self.catalog_by_name.get(&lang_id.to_lowercase()).copied())
1233 .unwrap_or_else(|| {
1234 let idx = self.catalog.len();
1235 self.catalog.push(GrammarEntry {
1236 display_name: lang_id.clone(),
1237 language_id: lang_id.clone(),
1238 short_name: None,
1239 extensions: Vec::new(),
1240 filenames: Vec::new(),
1241 filename_globs: Vec::new(),
1242 source: GrammarSource::BuiltIn,
1243 engines: GrammarEngines::default(),
1244 });
1245 idx
1246 });
1247
1248 self.catalog_by_name
1253 .entry(lang_id.to_lowercase())
1254 .or_insert(idx);
1255
1256 for ext in &lang_cfg.extensions {
1257 if !self.catalog[idx].extensions.iter().any(|e| e == ext) {
1258 self.catalog[idx].extensions.push(ext.clone());
1259 }
1260 self.catalog_by_extension.insert(ext.to_lowercase(), idx);
1262 }
1263 for filename in &lang_cfg.filenames {
1264 if is_glob_pattern(filename) {
1265 if !self.catalog[idx]
1266 .filename_globs
1267 .iter()
1268 .any(|f| f == filename)
1269 {
1270 self.catalog[idx].filename_globs.push(filename.clone());
1271 }
1272 } else {
1273 if !self.catalog[idx].filenames.iter().any(|f| f == filename) {
1274 self.catalog[idx].filenames.push(filename.clone());
1275 }
1276 self.catalog_by_filename.insert(filename.clone(), idx);
1277 }
1278 }
1279 }
1280 }
1281
1282 pub fn syntax_set(&self) -> &Arc<SyntaxSet> {
1284 &self.syntax_set
1285 }
1286
1287 pub fn syntax_set_arc(&self) -> Arc<SyntaxSet> {
1289 Arc::clone(&self.syntax_set)
1290 }
1291
1292 pub fn available_syntaxes(&self) -> Vec<&str> {
1294 self.syntax_set
1295 .syntaxes()
1296 .iter()
1297 .map(|s| s.name.as_str())
1298 .collect()
1299 }
1300
1301 pub fn available_grammar_info(&self) -> Vec<GrammarInfo> {
1308 let mut result: Vec<GrammarInfo> = self
1309 .catalog
1310 .iter()
1311 .map(|entry| GrammarInfo {
1312 name: entry.display_name.clone(),
1313 source: entry.source.clone(),
1314 file_extensions: entry.extensions.clone(),
1315 short_name: entry.short_name.clone(),
1316 })
1317 .collect();
1318 result.sort_by(|a, b| a.name.to_lowercase().cmp(&b.name.to_lowercase()));
1319 result
1320 }
1321
1322 pub(crate) fn grammar_sources(&self) -> &HashMap<String, GrammarInfo> {
1324 &self.grammar_sources
1325 }
1326
1327 pub(crate) fn build_grammar_sources_from_syntax_set(
1331 syntax_set: &SyntaxSet,
1332 ) -> HashMap<String, GrammarInfo> {
1333 let mut sources = HashMap::new();
1334 for syntax in syntax_set.syntaxes() {
1335 sources.insert(
1336 syntax.name.clone(),
1337 GrammarInfo {
1338 name: syntax.name.clone(),
1339 source: GrammarSource::BuiltIn,
1340 file_extensions: syntax.file_extensions.clone(),
1341 short_name: None,
1342 },
1343 );
1344 }
1345 sources
1346 }
1347
1348 #[cfg(test)]
1350 pub(crate) fn user_extensions(&self) -> &HashMap<String, String> {
1351 &self.user_extensions
1352 }
1353
1354 #[cfg(test)]
1356 pub(crate) fn loaded_grammar_paths(&self) -> &[GrammarSpec] {
1357 &self.loaded_grammar_paths
1358 }
1359
1360 pub fn with_additional_grammars(
1374 base: &GrammarRegistry,
1375 additional: &[GrammarSpec],
1376 ) -> Option<Self> {
1377 tracing::info!(
1378 "[SYNTAX DEBUG] with_additional_grammars: adding {} grammars to base with {} syntaxes",
1379 additional.len(),
1380 base.syntax_set.syntaxes().len()
1381 );
1382
1383 let mut builder = (*base.syntax_set).clone().into_builder();
1387
1388 let mut user_extensions = base.user_extensions.clone();
1390
1391 let mut loaded_grammar_paths = base.loaded_grammar_paths.clone();
1393
1394 let mut grammar_sources = base.grammar_sources.clone();
1396
1397 for spec in additional {
1399 tracing::info!(
1400 "[SYNTAX DEBUG] loading new grammar file: lang='{}', path={:?}, extensions={:?}",
1401 spec.language,
1402 spec.path,
1403 spec.extensions
1404 );
1405 match Self::load_grammar_file(&spec.path) {
1406 Ok(syntax) => {
1407 let scope = syntax.scope.to_string();
1408 let syntax_name = syntax.name.clone();
1409 tracing::info!(
1410 "[SYNTAX DEBUG] grammar loaded successfully: name='{}', scope='{}'",
1411 syntax_name,
1412 scope
1413 );
1414 builder.add(syntax);
1415 tracing::info!(
1416 "Loaded grammar for '{}' from {:?} with extensions {:?}",
1417 spec.language,
1418 spec.path,
1419 spec.extensions
1420 );
1421 for ext in &spec.extensions {
1423 user_extensions.insert(ext.clone(), scope.clone());
1424 }
1425 grammar_sources.insert(
1427 syntax_name.clone(),
1428 GrammarInfo {
1429 name: syntax_name,
1430 source: GrammarSource::Plugin {
1431 plugin: spec.language.clone(),
1432 path: spec.path.clone(),
1433 },
1434 file_extensions: spec.extensions.clone(),
1435 short_name: None,
1436 },
1437 );
1438 loaded_grammar_paths.push(spec.clone());
1440 }
1441 Err(e) => {
1442 tracing::warn!(
1443 "Failed to load grammar for '{}' from {:?}: {}",
1444 spec.language,
1445 spec.path,
1446 e
1447 );
1448 }
1449 }
1450 }
1451
1452 let mut reg = Self {
1453 syntax_set: Arc::new(builder.build()),
1454 user_extensions,
1455 filename_scopes: base.filename_scopes.clone(),
1456 loaded_grammar_paths,
1457 grammar_sources,
1458 aliases: base.aliases.clone(),
1459 catalog: Vec::new(),
1460 catalog_by_name: HashMap::new(),
1461 catalog_by_extension: HashMap::new(),
1462 catalog_by_filename: HashMap::new(),
1463 applied_language_config: HashMap::new(),
1464 catalog_gen: 0,
1465 };
1466 reg.rebuild_catalog();
1467 Some(reg)
1468 }
1469
1470 pub(crate) fn load_grammar_file(path: &Path) -> Result<SyntaxDefinition, String> {
1476 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
1477
1478 match ext {
1479 "sublime-syntax" => {
1480 let content = std::fs::read_to_string(path)
1481 .map_err(|e| format!("Failed to read file: {}", e))?;
1482 SyntaxDefinition::load_from_str(
1483 &content,
1484 true,
1485 path.file_stem().and_then(|s| s.to_str()),
1486 )
1487 .map_err(|e| format!("Failed to parse sublime-syntax: {}", e))
1488 }
1489 _ => Err(format!(
1490 "Unsupported grammar format: .{}. Only .sublime-syntax is supported.",
1491 ext
1492 )),
1493 }
1494 }
1495}
1496
1497impl Default for GrammarRegistry {
1498 fn default() -> Self {
1499 let defaults = SyntaxSet::load_defaults_newlines();
1501 let mut builder = defaults.into_builder();
1502 Self::add_embedded_grammars(&mut builder);
1503 let syntax_set = builder.build();
1504 let filename_scopes = Self::build_filename_scopes();
1505 let extra_extensions = Self::build_extra_extensions();
1506
1507 let mut registry = Self::new(syntax_set, extra_extensions, filename_scopes);
1508 registry.populate_built_in_aliases();
1509 registry.rebuild_catalog();
1510 registry
1511 }
1512}
1513
1514#[derive(Debug, Deserialize)]
1517pub struct PackageManifest {
1518 #[serde(default)]
1519 pub contributes: Option<Contributes>,
1520}
1521
1522#[derive(Debug, Deserialize, Default)]
1523pub struct Contributes {
1524 #[serde(default)]
1525 pub languages: Vec<LanguageContribution>,
1526 #[serde(default)]
1527 pub grammars: Vec<GrammarContribution>,
1528}
1529
1530#[derive(Debug, Deserialize)]
1531pub struct LanguageContribution {
1532 pub id: String,
1533 #[serde(default)]
1534 pub extensions: Vec<String>,
1535}
1536
1537#[derive(Debug, Deserialize)]
1538pub struct GrammarContribution {
1539 pub language: String,
1540 #[serde(rename = "scopeName")]
1541 pub scope_name: String,
1542 pub path: String,
1543}
1544
1545#[cfg(test)]
1546mod tests {
1547 use super::*;
1548
1549 #[test]
1550 fn test_empty_registry() {
1551 let registry = GrammarRegistry::empty();
1552 assert!(!registry.available_syntaxes().is_empty());
1554 }
1555
1556 #[test]
1557 fn test_default_registry() {
1558 let registry = GrammarRegistry::default();
1559 assert!(!registry.available_syntaxes().is_empty());
1561 }
1562
1563 #[test]
1564 fn test_find_syntax_for_common_extensions() {
1565 let registry = GrammarRegistry::default();
1566
1567 let test_cases = [
1574 ("test.py", true),
1575 ("test.rs", true),
1576 ("test.js", false),
1577 ("test.json", true),
1578 ("test.md", true),
1579 ("test.html", true),
1580 ("test.css", true),
1581 ("test.unknown_extension_xyz", false),
1582 ];
1583
1584 for (filename, should_exist) in test_cases {
1585 let path = Path::new(filename);
1586 let result = registry.find_syntax_for_file(path);
1587 assert_eq!(
1588 result.is_some(),
1589 should_exist,
1590 "Expected {:?} for {}",
1591 should_exist,
1592 filename
1593 );
1594 }
1595 }
1596
1597 #[test]
1598 fn test_racket_grammar_loaded() {
1599 let registry = GrammarRegistry::default();
1600 for filename in ["main.rkt", "data.rktd", "info.rktl", "doc.scrbl"] {
1601 let result = registry.find_syntax_for_file(Path::new(filename));
1602 assert!(
1603 result.is_some(),
1604 "Racket grammar should be available for {}",
1605 filename
1606 );
1607 let entry = registry.find_by_path(Path::new(filename), None).unwrap();
1608 assert_eq!(entry.display_name, "Racket", "for {}", filename);
1609 }
1610 }
1611
1612 #[test]
1613 fn test_syntax_set_arc() {
1614 let registry = GrammarRegistry::default();
1615 let arc1 = registry.syntax_set_arc();
1616 let arc2 = registry.syntax_set_arc();
1617 assert!(Arc::ptr_eq(&arc1, &arc2));
1619 }
1620
1621 #[test]
1622 fn test_shell_dotfiles_detection() {
1623 let registry = GrammarRegistry::default();
1624
1625 let shell_files = [".zshrc", ".zprofile", ".zshenv", ".bash_aliases"];
1627
1628 for filename in shell_files {
1629 let path = Path::new(filename);
1630 let result = registry.find_syntax_for_file(path);
1631 assert!(
1632 result.is_some(),
1633 "{} should be detected as a syntax",
1634 filename
1635 );
1636 let syntax = result.unwrap();
1637 assert!(
1639 syntax.name.to_lowercase().contains("bash")
1640 || syntax.name.to_lowercase().contains("shell"),
1641 "{} should be detected as shell/bash, got: {}",
1642 filename,
1643 syntax.name
1644 );
1645 }
1646 }
1647
1648 #[test]
1649 fn test_pkgbuild_detection() {
1650 let registry = GrammarRegistry::default();
1651
1652 for filename in ["PKGBUILD", "APKBUILD"] {
1654 let path = Path::new(filename);
1655 let result = registry.find_syntax_for_file(path);
1656 assert!(
1657 result.is_some(),
1658 "{} should be detected as a syntax",
1659 filename
1660 );
1661 let syntax = result.unwrap();
1662 assert!(
1664 syntax.name.to_lowercase().contains("bash")
1665 || syntax.name.to_lowercase().contains("shell"),
1666 "{} should be detected as shell/bash, got: {}",
1667 filename,
1668 syntax.name
1669 );
1670 }
1671 }
1672
1673 #[test]
1674 fn test_find_syntax_with_glob_filenames() {
1675 let mut registry = GrammarRegistry::default();
1676 let mut languages = std::collections::HashMap::new();
1677 languages.insert(
1678 "shell-configs".to_string(),
1679 crate::config::LanguageConfig {
1680 extensions: vec!["sh".to_string()],
1681 filenames: vec!["*.conf".to_string(), "*rc".to_string()],
1682 grammar: "bash".to_string(),
1683 comment_prefix: Some("#".to_string()),
1684 auto_indent: true,
1685 auto_close: None,
1686 auto_surround: None,
1687 textmate_grammar: None,
1688 show_whitespace_tabs: true,
1689 line_wrap: None,
1690 wrap_column: None,
1691 page_view: None,
1692 page_width: None,
1693 use_tabs: None,
1694 tab_size: None,
1695 formatter: None,
1696 format_on_save: false,
1697 on_save: vec![],
1698 word_characters: None,
1699 },
1700 );
1701 registry.apply_language_config(&languages);
1702
1703 assert!(
1704 registry
1705 .find_by_path(Path::new("nftables.conf"), None)
1706 .is_some(),
1707 "*.conf should match nftables.conf"
1708 );
1709 assert!(
1710 registry.find_by_path(Path::new("lfrc"), None).is_some(),
1711 "*rc should match lfrc"
1712 );
1713 let _ = registry.find_by_path(Path::new("randomfile"), None);
1715 }
1716
1717 #[test]
1718 fn test_find_syntax_with_path_glob_filenames() {
1719 let mut registry = GrammarRegistry::default();
1720 let mut languages = std::collections::HashMap::new();
1721 languages.insert(
1722 "shell-configs".to_string(),
1723 crate::config::LanguageConfig {
1724 extensions: vec!["sh".to_string()],
1725 filenames: vec!["/etc/**/rc.*".to_string()],
1726 grammar: "bash".to_string(),
1727 comment_prefix: Some("#".to_string()),
1728 auto_indent: true,
1729 auto_close: None,
1730 auto_surround: None,
1731 textmate_grammar: None,
1732 show_whitespace_tabs: true,
1733 line_wrap: None,
1734 wrap_column: None,
1735 page_view: None,
1736 page_width: None,
1737 use_tabs: None,
1738 tab_size: None,
1739 formatter: None,
1740 format_on_save: false,
1741 on_save: vec![],
1742 word_characters: None,
1743 },
1744 );
1745 registry.apply_language_config(&languages);
1746
1747 assert!(
1748 registry
1749 .find_by_path(Path::new("/etc/rc.conf"), None)
1750 .is_some(),
1751 "/etc/**/rc.* should match /etc/rc.conf"
1752 );
1753 assert!(
1754 registry
1755 .find_by_path(Path::new("/etc/init/rc.local"), None)
1756 .is_some(),
1757 "/etc/**/rc.* should match /etc/init/rc.local"
1758 );
1759 let _ = registry.find_by_path(Path::new("/var/rc.conf"), None);
1760 }
1761
1762 #[test]
1763 fn test_exact_filename_takes_priority_over_glob() {
1764 let mut registry = GrammarRegistry::default();
1765 let mut languages = std::collections::HashMap::new();
1766
1767 languages.insert(
1769 "custom-lfrc".to_string(),
1770 crate::config::LanguageConfig {
1771 extensions: vec![],
1772 filenames: vec!["lfrc".to_string()],
1773 grammar: "python".to_string(),
1774 comment_prefix: Some("#".to_string()),
1775 auto_indent: true,
1776 auto_close: None,
1777 auto_surround: None,
1778 textmate_grammar: None,
1779 show_whitespace_tabs: true,
1780 line_wrap: None,
1781 wrap_column: None,
1782 page_view: None,
1783 page_width: None,
1784 use_tabs: None,
1785 tab_size: None,
1786 formatter: None,
1787 format_on_save: false,
1788 on_save: vec![],
1789 word_characters: None,
1790 },
1791 );
1792
1793 languages.insert(
1795 "rc-files".to_string(),
1796 crate::config::LanguageConfig {
1797 extensions: vec![],
1798 filenames: vec!["*rc".to_string()],
1799 grammar: "bash".to_string(),
1800 comment_prefix: Some("#".to_string()),
1801 auto_indent: true,
1802 auto_close: None,
1803 auto_surround: None,
1804 textmate_grammar: None,
1805 show_whitespace_tabs: true,
1806 line_wrap: None,
1807 wrap_column: None,
1808 page_view: None,
1809 page_width: None,
1810 use_tabs: None,
1811 tab_size: None,
1812 formatter: None,
1813 format_on_save: false,
1814 on_save: vec![],
1815 word_characters: None,
1816 },
1817 );
1818
1819 registry.apply_language_config(&languages);
1820
1821 let entry = registry.find_by_path(Path::new("lfrc"), None).unwrap();
1823 assert!(
1824 entry.display_name.to_lowercase().contains("python"),
1825 "exact match should win over glob, got: {}",
1826 entry.display_name
1827 );
1828 }
1829
1830 #[test]
1831 fn test_built_in_aliases_resolve() {
1832 let registry = GrammarRegistry::default();
1833
1834 let syntax = registry.find_syntax_by_name("bash");
1836 assert!(syntax.is_some(), "alias 'bash' should resolve");
1837 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1838
1839 let syntax = registry.find_syntax_by_name("cpp");
1841 assert!(syntax.is_some(), "alias 'cpp' should resolve");
1842 assert_eq!(syntax.unwrap().name, "C++");
1843
1844 let syntax = registry.find_syntax_by_name("csharp");
1846 assert!(syntax.is_some(), "alias 'csharp' should resolve");
1847 assert_eq!(syntax.unwrap().name, "C#");
1848
1849 let syntax = registry.find_syntax_by_name("sh");
1851 assert!(syntax.is_some(), "alias 'sh' should resolve");
1852 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1853
1854 let syntax = registry.find_syntax_by_name("proto");
1856 assert!(syntax.is_some(), "alias 'proto' should resolve");
1857 assert_eq!(syntax.unwrap().name, "Protocol Buffers");
1858 }
1859
1860 #[test]
1861 fn test_alias_case_insensitive_input() {
1862 let registry = GrammarRegistry::default();
1863
1864 let syntax = registry.find_syntax_by_name("BASH");
1866 assert!(
1867 syntax.is_some(),
1868 "alias 'BASH' should resolve case-insensitively"
1869 );
1870 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1871
1872 let syntax = registry.find_syntax_by_name("Cpp");
1873 assert!(
1874 syntax.is_some(),
1875 "alias 'Cpp' should resolve case-insensitively"
1876 );
1877 assert_eq!(syntax.unwrap().name, "C++");
1878 }
1879
1880 #[test]
1881 fn test_full_name_still_works() {
1882 let registry = GrammarRegistry::default();
1883
1884 let syntax = registry.find_syntax_by_name("Bourne Again Shell (bash)");
1886 assert!(syntax.is_some(), "full name should still resolve");
1887 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1888
1889 let syntax = registry.find_syntax_by_name("bourne again shell (bash)");
1891 assert!(
1892 syntax.is_some(),
1893 "case-insensitive full name should resolve"
1894 );
1895 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1896 }
1897
1898 #[test]
1899 fn test_alias_does_not_shadow_full_names() {
1900 let registry = GrammarRegistry::default();
1901
1902 let syntax = registry.find_syntax_by_name("rust");
1904 assert!(syntax.is_some());
1905 assert_eq!(syntax.unwrap().name, "Rust");
1906
1907 let syntax = registry.find_syntax_by_name("go");
1909 assert!(syntax.is_some());
1910 assert_eq!(syntax.unwrap().name, "Go");
1911 }
1912
1913 #[test]
1914 fn test_register_alias_rejects_collision() {
1915 let mut registry = GrammarRegistry::default();
1916
1917 assert!(registry.register_alias("myalias", "Rust"));
1919 assert!(!registry.register_alias("myalias", "Go"));
1920
1921 assert!(registry.register_alias("myalias", "Rust"));
1923 }
1924
1925 #[test]
1926 fn test_register_alias_rejects_nonexistent_target() {
1927 let mut registry = GrammarRegistry::default();
1928 assert!(!registry.register_alias("nope", "Nonexistent Grammar"));
1929 }
1930
1931 #[test]
1932 fn test_register_alias_skips_existing_grammar_name() {
1933 let mut registry = GrammarRegistry::default();
1934
1935 assert!(!registry.register_alias("rust", "Rust"));
1937 assert!(registry.find_syntax_by_name("rust").is_some());
1939 }
1940
1941 #[test]
1942 fn test_available_grammar_info_includes_short_names() {
1943 let registry = GrammarRegistry::default();
1944 let infos = registry.available_grammar_info();
1945
1946 let bash_info = infos.iter().find(|g| g.name == "Bourne Again Shell (bash)");
1947 assert!(bash_info.is_some(), "bash grammar should be in the list");
1948 let bash_info = bash_info.unwrap();
1949 assert!(
1950 bash_info.short_name.is_some(),
1951 "bash grammar should have a short_name"
1952 );
1953 assert_eq!(bash_info.short_name.as_deref(), Some("sh"));
1955 }
1956
1957 #[test]
1958 fn test_catalog_contains_each_language_once() {
1959 let registry = GrammarRegistry::default();
1960 let catalog = registry.catalog();
1961
1962 let mut seen = std::collections::HashSet::new();
1964 for entry in catalog {
1965 let key = entry.display_name.to_lowercase();
1966 assert!(
1967 seen.insert(key.clone()),
1968 "duplicate catalog entry for display_name={:?}",
1969 entry.display_name
1970 );
1971 }
1972
1973 let ts = registry
1976 .find_by_name("TypeScript")
1977 .expect("TypeScript must be in the catalog");
1978 assert!(ts.engines.syntect.is_none());
1979 assert_eq!(
1980 ts.engines.tree_sitter,
1981 Some(fresh_languages::Language::TypeScript)
1982 );
1983 assert_eq!(ts.language_id, "typescript");
1984 assert!(ts.extensions.iter().any(|e| e == "ts"));
1985
1986 for name in ["Rust", "Python"] {
1989 let entry = registry
1990 .find_by_name(name)
1991 .unwrap_or_else(|| panic!("{} must be in the catalog", name));
1992 assert!(
1993 entry.engines.syntect.is_some(),
1994 "{} should have a syntect index",
1995 name
1996 );
1997 assert!(
1998 entry.engines.tree_sitter.is_some(),
1999 "{} should also have a tree-sitter language",
2000 name
2001 );
2002 let by_id = registry
2005 .find_by_name(&entry.language_id)
2006 .expect("language_id should resolve");
2007 assert_eq!(by_id.display_name, entry.display_name);
2008 }
2009
2010 let js = registry
2016 .find_by_name("JavaScript")
2017 .expect("JavaScript must be in the catalog");
2018 assert!(
2019 js.engines.syntect.is_none(),
2020 "JavaScript must not be routed to the syntect engine (issue #899)"
2021 );
2022 assert_eq!(
2023 js.engines.tree_sitter,
2024 Some(fresh_languages::Language::JavaScript),
2025 "JavaScript must carry the tree-sitter language"
2026 );
2027 }
2028
2029 #[test]
2030 fn test_catalog_find_by_path_and_extension() {
2031 let registry = GrammarRegistry::default();
2032 let ts = registry
2033 .find_by_path(Path::new("foo.ts"), None)
2034 .expect("foo.ts should resolve");
2035 assert_eq!(ts.display_name, "TypeScript");
2036 let rs = registry.find_by_extension("rs").expect("rs should resolve");
2037 assert_eq!(rs.display_name, "Rust");
2038 }
2039
2040 fn lang_cfg(
2042 grammar: &str,
2043 extensions: &[&str],
2044 filenames: &[&str],
2045 ) -> crate::config::LanguageConfig {
2046 crate::config::LanguageConfig {
2047 extensions: extensions.iter().map(|s| s.to_string()).collect(),
2048 filenames: filenames.iter().map(|s| s.to_string()).collect(),
2049 grammar: grammar.to_string(),
2050 comment_prefix: None,
2051 auto_indent: true,
2052 auto_close: None,
2053 auto_surround: None,
2054 textmate_grammar: None,
2055 show_whitespace_tabs: true,
2056 line_wrap: None,
2057 wrap_column: None,
2058 page_view: None,
2059 page_width: None,
2060 use_tabs: None,
2061 tab_size: None,
2062 formatter: None,
2063 format_on_save: false,
2064 on_save: vec![],
2065 word_characters: None,
2066 }
2067 }
2068
2069 #[test]
2073 fn test_user_alias_resolves_via_find_by_name() {
2074 let mut registry = GrammarRegistry::default();
2075 let mut languages = std::collections::HashMap::new();
2076 languages.insert("mylang".to_string(), lang_cfg("Rust", &[], &[]));
2077 registry.apply_language_config(&languages);
2078
2079 let entry = registry
2080 .find_by_name("mylang")
2081 .expect("user-declared alias 'mylang' must resolve");
2082 assert_eq!(entry.display_name, "Rust");
2083 }
2084
2085 #[test]
2089 fn test_register_alias_preserves_applied_language_config() {
2090 let mut registry = GrammarRegistry::default();
2091 let mut languages = std::collections::HashMap::new();
2092 languages.insert(
2093 "shell-configs".to_string(),
2094 lang_cfg("bash", &["myconf"], &["*.myconf"]),
2095 );
2096 registry.apply_language_config(&languages);
2097
2098 assert!(registry.find_by_extension("myconf").is_some());
2100 assert!(
2101 registry
2102 .find_by_path(Path::new("foo.myconf"), None)
2103 .is_some(),
2104 "glob should match before register_alias"
2105 );
2106
2107 registry.register_alias("mycustom", "Rust");
2109
2110 assert!(
2111 registry.find_by_extension("myconf").is_some(),
2112 "config extension must survive register_alias"
2113 );
2114 assert!(
2115 registry
2116 .find_by_path(Path::new("foo.myconf"), None)
2117 .is_some(),
2118 "glob must survive register_alias"
2119 );
2120 }
2121
2122 #[test]
2126 fn test_from_syntax_name_preserves_canonical_display_name() {
2127 use crate::primitives::detected_language::DetectedLanguage;
2128 let registry = GrammarRegistry::default();
2129 let languages = std::collections::HashMap::new();
2130
2131 let detected = DetectedLanguage::from_syntax_name("BASH", ®istry, &languages)
2132 .expect("BASH should resolve via alias");
2133 assert_eq!(
2134 detected.display_name, "Bourne Again Shell (bash)",
2135 "display_name must be canonical, not user-typed"
2136 );
2137 }
2138
2139 #[test]
2143 fn test_config_only_language_appears_in_catalog() {
2144 let mut registry = GrammarRegistry::default();
2145 let mut languages = std::collections::HashMap::new();
2146 languages.insert("fish".to_string(), lang_cfg("fish", &["fish"], &[]));
2148 registry.apply_language_config(&languages);
2149
2150 let entry = registry
2151 .find_by_name("fish")
2152 .expect("fish should be in the catalog after apply_language_config");
2153 assert!(entry.engines.syntect.is_none());
2154 assert!(entry.engines.tree_sitter.is_none());
2155 assert_eq!(entry.language_id, "fish");
2156 assert!(entry.extensions.iter().any(|e| e == "fish"));
2157 }
2158
2159 #[test]
2164 fn test_config_extension_overrides_builtin() {
2165 let mut registry = GrammarRegistry::default();
2166 assert_eq!(
2168 registry.find_by_extension("js").unwrap().display_name,
2169 "JavaScript"
2170 );
2171
2172 let mut languages = std::collections::HashMap::new();
2173 languages.insert(
2174 "ts-overlay".to_string(),
2175 lang_cfg("TypeScript", &["js"], &[]),
2176 );
2177 registry.apply_language_config(&languages);
2178
2179 assert_eq!(
2180 registry.find_by_extension("js").unwrap().display_name,
2181 "TypeScript",
2182 "user-config extension must win over built-in"
2183 );
2184 }
2185
2186 #[test]
2193 fn test_bare_filename_resolves_via_find_by_path() {
2194 let registry = GrammarRegistry::default();
2195 for (filename, expected_substr) in [
2196 ("Gemfile", "ruby"),
2197 ("Rakefile", "ruby"),
2198 ("Vagrantfile", "ruby"),
2199 ("Makefile", "makefile"),
2200 ("GNUmakefile", "makefile"),
2201 ] {
2202 let entry = registry
2203 .find_by_path(Path::new(filename), None)
2204 .unwrap_or_else(|| panic!("{} must resolve via catalog", filename));
2205 assert!(
2206 entry.display_name.to_lowercase().contains(expected_substr),
2207 "{} should resolve to {} grammar, got {}",
2208 filename,
2209 expected_substr,
2210 entry.display_name
2211 );
2212 }
2213 }
2214
2215 #[test]
2220 fn test_jsx_resolves_to_javascript() {
2221 let registry = GrammarRegistry::default();
2222 let entry = registry
2223 .find_by_path(Path::new("foo.jsx"), None)
2224 .expect("foo.jsx must resolve");
2225 assert_eq!(entry.display_name, "JavaScript");
2226 }
2227
2228 #[test]
2233 fn test_rebuild_catalog_replays_language_config() {
2234 let mut registry = GrammarRegistry::default();
2235 let mut languages = std::collections::HashMap::new();
2236 languages.insert(
2237 "myshell".to_string(),
2238 lang_cfg("bash", &["myext"], &["*.myglob"]),
2239 );
2240 registry.apply_language_config(&languages);
2241 assert!(registry.find_by_extension("myext").is_some());
2242 assert!(registry
2243 .find_by_path(Path::new("foo.myglob"), None)
2244 .is_some());
2245
2246 registry.rebuild_catalog();
2249 assert!(
2250 registry.find_by_extension("myext").is_some(),
2251 "rebuild_catalog must replay applied user config"
2252 );
2253 assert!(
2254 registry
2255 .find_by_path(Path::new("foo.myglob"), None)
2256 .is_some(),
2257 "rebuild_catalog must replay user globs"
2258 );
2259 }
2260
2261 #[test]
2264 fn test_apply_language_config_idempotent() {
2265 let mut registry = GrammarRegistry::default();
2266 let mut languages = std::collections::HashMap::new();
2267 languages.insert(
2268 "shell-cfg".to_string(),
2269 lang_cfg("bash", &["myconf"], &["*.myconf"]),
2270 );
2271
2272 registry.apply_language_config(&languages);
2273 let first_extensions = registry
2274 .find_by_name("bash")
2275 .unwrap()
2276 .extensions
2277 .iter()
2278 .filter(|e| e == &"myconf")
2279 .count();
2280 let first_globs = registry
2281 .find_by_name("bash")
2282 .unwrap()
2283 .filename_globs
2284 .iter()
2285 .filter(|g| g == &"*.myconf")
2286 .count();
2287 assert_eq!(first_extensions, 1);
2288 assert_eq!(first_globs, 1);
2289
2290 registry.apply_language_config(&languages);
2292 let second_extensions = registry
2293 .find_by_name("bash")
2294 .unwrap()
2295 .extensions
2296 .iter()
2297 .filter(|e| e == &"myconf")
2298 .count();
2299 let second_globs = registry
2300 .find_by_name("bash")
2301 .unwrap()
2302 .filename_globs
2303 .iter()
2304 .filter(|g| g == &"*.myconf")
2305 .count();
2306 assert_eq!(second_extensions, 1, "extensions must not duplicate");
2307 assert_eq!(second_globs, 1, "globs must not duplicate");
2308 }
2309
2310 #[test]
2316 fn test_julia_adjoint_does_not_start_string() {
2317 use syntect::parsing::{ParseState, ScopeStack};
2318
2319 let registry = GrammarRegistry::default();
2320 let syntax_set = registry.syntax_set();
2321 let syntax = registry
2322 .find_syntax_by_name("Julia")
2323 .expect("Julia grammar must be loaded");
2324 let mut state = ParseState::new(syntax);
2325 let mut scopes = ScopeStack::new();
2326
2327 let lines = ["x = A'\n", "function foo()\n", "end\n"];
2329 let mut keyword_line_in_string = false;
2330 let mut found_function_keyword = false;
2331
2332 for line in &lines {
2333 let ops = state.parse_line(line, syntax_set).unwrap();
2334 let mut op_iter = ops.iter().peekable();
2336 for (byte_idx, _) in line.char_indices() {
2337 while let Some((offset, op)) = op_iter.peek() {
2338 if *offset <= byte_idx {
2339 scopes.apply(op).unwrap();
2340 op_iter.next();
2341 } else {
2342 break;
2343 }
2344 }
2345 let in_string = scopes
2346 .as_slice()
2347 .iter()
2348 .any(|s| s.build_string().starts_with("string."));
2349 let is_function_kw = line[byte_idx..].starts_with("function");
2350 if is_function_kw && in_string {
2351 keyword_line_in_string = true;
2352 }
2353 if is_function_kw && !in_string {
2354 found_function_keyword = true;
2355 }
2356 }
2357 for (_, op) in op_iter {
2359 scopes.apply(op).unwrap();
2360 }
2361 }
2362
2363 assert!(
2364 !keyword_line_in_string,
2365 "the `function` keyword after an adjoint operator must not be inside a string scope"
2366 );
2367 assert!(
2368 found_function_keyword,
2369 "test harness must have reached the `function` keyword"
2370 );
2371 }
2372
2373 #[test]
2376 fn test_julia_char_literal_is_recognized() {
2377 use syntect::parsing::{ParseState, ScopeStack};
2378
2379 let registry = GrammarRegistry::default();
2380 let syntax_set = registry.syntax_set();
2381 let syntax = registry
2382 .find_syntax_by_name("Julia")
2383 .expect("Julia grammar must be loaded");
2384 let mut state = ParseState::new(syntax);
2385 let mut scopes = ScopeStack::new();
2386
2387 let line = "x = 'a'\n";
2388 let ops = state.parse_line(line, syntax_set).unwrap();
2389 let mut saw_constant_or_string_at_quote = false;
2390 let mut op_iter = ops.iter().peekable();
2391 for (byte_idx, _) in line.char_indices() {
2392 while let Some((offset, op)) = op_iter.peek() {
2393 if *offset <= byte_idx {
2394 scopes.apply(op).unwrap();
2395 op_iter.next();
2396 } else {
2397 break;
2398 }
2399 }
2400 if byte_idx == 5 {
2401 let scoped = scopes.as_slice().iter().any(|s| {
2403 let str = s.build_string();
2404 str.starts_with("constant.") || str.starts_with("string.")
2405 });
2406 if scoped {
2407 saw_constant_or_string_at_quote = true;
2408 }
2409 }
2410 }
2411 assert!(
2412 saw_constant_or_string_at_quote,
2413 "char literal 'a' must receive a constant/string scope"
2414 );
2415 }
2416
2417 #[test]
2421 fn test_tree_sitter_bridge() {
2422 assert_eq!(
2423 tree_sitter_for_syntect_name("Bourne Again Shell (bash)"),
2424 Some(fresh_languages::Language::Bash)
2425 );
2426 assert_eq!(
2427 tree_sitter_for_syntect_name("Rust"),
2428 Some(fresh_languages::Language::Rust)
2429 );
2430 assert_eq!(tree_sitter_for_syntect_name("Nushell"), None);
2432 assert_eq!(tree_sitter_for_syntect_name("does-not-exist"), None);
2434 }
2435}