1use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10use syntect::parsing::{SyntaxDefinition, SyntaxReference, SyntaxSet, SyntaxSetBuilder};
11
12pub use crate::primitives::glob_match::{
14 filename_glob_matches, is_glob_pattern, is_path_pattern, path_glob_matches,
15};
16
17#[derive(Clone, Debug)]
22pub struct GrammarSpec {
23 pub language: String,
25 pub path: PathBuf,
27 pub extensions: Vec<String>,
29}
30
31#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
33#[serde(tag = "type")]
34pub enum GrammarSource {
35 #[serde(rename = "built-in")]
37 BuiltIn,
38 #[serde(rename = "user")]
40 User { path: PathBuf },
41 #[serde(rename = "language-pack")]
43 LanguagePack { name: String, path: PathBuf },
44 #[serde(rename = "bundle")]
46 Bundle { name: String, path: PathBuf },
47 #[serde(rename = "plugin")]
49 Plugin { plugin: String, path: PathBuf },
50}
51
52impl std::fmt::Display for GrammarSource {
53 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54 match self {
55 GrammarSource::BuiltIn => write!(f, "built-in"),
56 GrammarSource::User { path } => write!(f, "user ({})", path.display()),
57 GrammarSource::LanguagePack { name, .. } => write!(f, "language-pack ({})", name),
58 GrammarSource::Bundle { name, .. } => write!(f, "bundle ({})", name),
59 GrammarSource::Plugin { plugin, .. } => write!(f, "plugin ({})", plugin),
60 }
61 }
62}
63
64#[derive(Clone, Debug, Serialize, Deserialize)]
66pub struct GrammarInfo {
67 pub name: String,
69 pub source: GrammarSource,
71 pub file_extensions: Vec<String>,
73 #[serde(default, skip_serializing_if = "Option::is_none")]
75 pub short_name: Option<String>,
76}
77
78const SYNTECT_TO_TREE_SITTER_ALIASES: &[(&str, fresh_languages::Language)] =
86 &[("Bourne Again Shell (bash)", fresh_languages::Language::Bash)];
87
88fn tree_sitter_for_syntect_name(display_name: &str) -> Option<fresh_languages::Language> {
91 for (syntect_name, lang) in SYNTECT_TO_TREE_SITTER_ALIASES {
92 if *syntect_name == display_name {
93 return Some(*lang);
94 }
95 }
96 fresh_languages::Language::all()
97 .iter()
98 .find(|l| l.display_name() == display_name)
99 .copied()
100}
101
102#[derive(Clone, Debug, Default)]
107pub struct GrammarEngines {
108 pub syntect: Option<usize>,
111 pub tree_sitter: Option<fresh_languages::Language>,
113}
114
115#[derive(Clone, Debug)]
124pub struct GrammarEntry {
125 pub display_name: String,
127 pub language_id: String,
129 pub short_name: Option<String>,
131 pub extensions: Vec<String>,
133 pub filenames: Vec<String>,
135 pub filename_globs: Vec<String>,
137 pub source: GrammarSource,
139 pub engines: GrammarEngines,
141}
142
143pub const TOML_GRAMMAR: &str = include_str!("../../grammars/toml.sublime-syntax");
145
146pub const ODIN_GRAMMAR: &str = include_str!("../../grammars/odin/Odin.sublime-syntax");
149
150pub const ZIG_GRAMMAR: &str = include_str!("../../grammars/zig.sublime-syntax");
152
153pub const GDSCRIPT_GRAMMAR: &str = include_str!("../../grammars/gdscript.sublime-syntax");
156
157pub const GIT_REBASE_GRAMMAR: &str = include_str!("../../grammars/git-rebase.sublime-syntax");
159
160pub const GIT_COMMIT_GRAMMAR: &str = include_str!("../../grammars/git-commit.sublime-syntax");
162
163pub const GITIGNORE_GRAMMAR: &str = include_str!("../../grammars/gitignore.sublime-syntax");
165
166pub const GITCONFIG_GRAMMAR: &str = include_str!("../../grammars/gitconfig.sublime-syntax");
168
169pub const GITATTRIBUTES_GRAMMAR: &str = include_str!("../../grammars/gitattributes.sublime-syntax");
171
172pub const TYPST_GRAMMAR: &str = include_str!("../../grammars/typst.sublime-syntax");
174
175pub const DOCKERFILE_GRAMMAR: &str = include_str!("../../grammars/dockerfile.sublime-syntax");
177pub const INI_GRAMMAR: &str = include_str!("../../grammars/ini.sublime-syntax");
179pub const CMAKE_GRAMMAR: &str = include_str!("../../grammars/cmake.sublime-syntax");
181pub const SCSS_GRAMMAR: &str = include_str!("../../grammars/scss.sublime-syntax");
183pub const LESS_GRAMMAR: &str = include_str!("../../grammars/less.sublime-syntax");
185pub const POWERSHELL_GRAMMAR: &str = include_str!("../../grammars/powershell.sublime-syntax");
187pub const KOTLIN_GRAMMAR: &str = include_str!("../../grammars/kotlin.sublime-syntax");
189pub const SWIFT_GRAMMAR: &str = include_str!("../../grammars/swift.sublime-syntax");
191pub const DART_GRAMMAR: &str = include_str!("../../grammars/dart.sublime-syntax");
193pub const ELIXIR_GRAMMAR: &str = include_str!("../../grammars/elixir.sublime-syntax");
195pub const FSHARP_GRAMMAR: &str = include_str!("../../grammars/fsharp.sublime-syntax");
197pub const NIX_GRAMMAR: &str = include_str!("../../grammars/nix.sublime-syntax");
199pub const HCL_GRAMMAR: &str = include_str!("../../grammars/hcl.sublime-syntax");
201pub const PROTOBUF_GRAMMAR: &str = include_str!("../../grammars/protobuf.sublime-syntax");
203pub const GRAPHQL_GRAMMAR: &str = include_str!("../../grammars/graphql.sublime-syntax");
205pub const JULIA_GRAMMAR: &str = include_str!("../../grammars/julia.sublime-syntax");
207pub const NIM_GRAMMAR: &str = include_str!("../../grammars/nim.sublime-syntax");
209pub const GLEAM_GRAMMAR: &str = include_str!("../../grammars/gleam.sublime-syntax");
211pub const VLANG_GRAMMAR: &str = include_str!("../../grammars/vlang.sublime-syntax");
213pub const SOLIDITY_GRAMMAR: &str = include_str!("../../grammars/solidity.sublime-syntax");
215pub const KDL_GRAMMAR: &str = include_str!("../../grammars/kdl.sublime-syntax");
217pub const NUSHELL_GRAMMAR: &str = include_str!("../../grammars/nushell.sublime-syntax");
219pub const SMALI_GRAMMAR: &str = include_str!("../../grammars/smali.sublime-syntax");
221pub const FISH_GRAMMAR: &str = include_str!("../../grammars/fish.sublime-syntax");
223pub const STARLARK_GRAMMAR: &str = include_str!("../../grammars/starlark.sublime-syntax");
225pub const JUSTFILE_GRAMMAR: &str = include_str!("../../grammars/justfile.sublime-syntax");
227pub const EARTHFILE_GRAMMAR: &str = include_str!("../../grammars/earthfile.sublime-syntax");
229pub const GOMOD_GRAMMAR: &str = include_str!("../../grammars/gomod.sublime-syntax");
231pub const VUE_GRAMMAR: &str = include_str!("../../grammars/vue.sublime-syntax");
233pub const SVELTE_GRAMMAR: &str = include_str!("../../grammars/svelte.sublime-syntax");
235pub const ASTRO_GRAMMAR: &str = include_str!("../../grammars/astro.sublime-syntax");
237pub const HYPRLANG_GRAMMAR: &str = include_str!("../../grammars/hyprlang.sublime-syntax");
239pub const AUTOHOTKEY_GRAMMAR: &str =
242 include_str!("../../grammars/autohotkey/AutoHotkey.sublime-syntax");
243pub const RACKET_GRAMMAR: &str = include_str!("../../grammars/racket.sublime-syntax");
245pub const VERILOG_GRAMMAR: &str = include_str!("../../grammars/verilog.sublime-syntax");
247pub const SYSTEMVERILOG_GRAMMAR: &str = include_str!("../../grammars/systemverilog.sublime-syntax");
249pub const VHDL_GRAMMAR: &str = include_str!("../../grammars/vhdl.sublime-syntax");
251
252pub const C3_GRAMMAR: &str = include_str!("../../grammars/c3.sublime-syntax");
253
254pub const ASM_GRAMMAR: &str = include_str!("../../grammars/asm.sublime-syntax");
257
258impl std::fmt::Debug for GrammarRegistry {
263 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
264 f.debug_struct("GrammarRegistry")
265 .field("syntax_count", &self.syntax_set.syntaxes().len())
266 .finish()
267 }
268}
269
270pub struct GrammarRegistry {
271 syntax_set: Arc<SyntaxSet>,
273 user_extensions: HashMap<String, String>,
275 filename_scopes: HashMap<String, String>,
277 loaded_grammar_paths: Vec<GrammarSpec>,
279 grammar_sources: HashMap<String, GrammarInfo>,
281 aliases: HashMap<String, String>,
285 catalog: Vec<GrammarEntry>,
289 catalog_by_name: HashMap<String, usize>,
292 catalog_by_extension: HashMap<String, usize>,
294 catalog_by_filename: HashMap<String, usize>,
296 applied_language_config: HashMap<String, crate::config::LanguageConfig>,
301 catalog_gen: u64,
305}
306
307impl GrammarRegistry {
308 pub(crate) fn new(
313 syntax_set: SyntaxSet,
314 user_extensions: HashMap<String, String>,
315 filename_scopes: HashMap<String, String>,
316 ) -> Self {
317 Self::new_with_loaded_paths(
318 syntax_set,
319 user_extensions,
320 filename_scopes,
321 Vec::new(),
322 HashMap::new(),
323 )
324 }
325
326 pub(crate) fn new_with_loaded_paths(
331 syntax_set: SyntaxSet,
332 user_extensions: HashMap<String, String>,
333 filename_scopes: HashMap<String, String>,
334 loaded_grammar_paths: Vec<GrammarSpec>,
335 grammar_sources: HashMap<String, GrammarInfo>,
336 ) -> Self {
337 let mut reg = Self {
338 syntax_set: Arc::new(syntax_set),
339 user_extensions,
340 filename_scopes,
341 loaded_grammar_paths,
342 grammar_sources,
343 aliases: HashMap::new(),
344 catalog: Vec::new(),
345 catalog_by_name: HashMap::new(),
346 catalog_by_extension: HashMap::new(),
347 catalog_by_filename: HashMap::new(),
348 applied_language_config: HashMap::new(),
349 catalog_gen: 0,
350 };
351 reg.rebuild_catalog();
352 reg
353 }
354
355 pub fn empty() -> Arc<Self> {
357 let mut builder = SyntaxSetBuilder::new();
358 builder.add_plain_text_syntax();
359 let mut reg = Self {
360 syntax_set: Arc::new(builder.build()),
361 user_extensions: HashMap::new(),
362 filename_scopes: HashMap::new(),
363 loaded_grammar_paths: Vec::new(),
364 grammar_sources: HashMap::new(),
365 aliases: HashMap::new(),
366 catalog: Vec::new(),
367 catalog_by_name: HashMap::new(),
368 catalog_by_extension: HashMap::new(),
369 catalog_by_filename: HashMap::new(),
370 applied_language_config: HashMap::new(),
371 catalog_gen: 0,
372 };
373 reg.rebuild_catalog();
374 Arc::new(reg)
375 }
376
377 pub fn defaults_only() -> Arc<Self> {
384 tracing::info!("defaults_only: loading pre-compiled syntax packdump...");
388 let syntax_set: SyntaxSet = syntect::dumps::from_uncompressed_data(include_bytes!(
389 concat!(env!("OUT_DIR"), "/default_syntaxes.packdump")
390 ))
391 .expect("Failed to load pre-compiled syntax packdump");
392 tracing::info!(
393 "defaults_only: loaded ({} syntaxes)",
394 syntax_set.syntaxes().len()
395 );
396 let grammar_sources = Self::build_grammar_sources_from_syntax_set(&syntax_set);
397 let filename_scopes = Self::build_filename_scopes();
398 let extra_extensions = Self::build_extra_extensions();
399 let mut registry = Self {
400 syntax_set: Arc::new(syntax_set),
401 user_extensions: extra_extensions,
402 filename_scopes,
403 loaded_grammar_paths: Vec::new(),
404 grammar_sources,
405 aliases: HashMap::new(),
406 catalog: Vec::new(),
407 catalog_by_name: HashMap::new(),
408 catalog_by_extension: HashMap::new(),
409 catalog_by_filename: HashMap::new(),
410 applied_language_config: HashMap::new(),
411 catalog_gen: 0,
412 };
413 registry.populate_built_in_aliases();
414 registry.rebuild_catalog();
415 Arc::new(registry)
416 }
417
418 pub(crate) fn build_extra_extensions() -> HashMap<String, String> {
423 let mut map = HashMap::new();
424
425 let js_scope = "source.js".to_string();
427 map.insert("cjs".to_string(), js_scope.clone());
428 map.insert("mjs".to_string(), js_scope);
429
430 map
434 }
435
436 pub(crate) fn build_filename_scopes() -> HashMap<String, String> {
438 let mut map = HashMap::new();
439
440 let shell_scope = "source.shell.bash".to_string();
442 for filename in [
443 ".zshrc",
444 ".zprofile",
445 ".zshenv",
446 ".zlogin",
447 ".zlogout",
448 ".bash_aliases",
449 "PKGBUILD",
452 "APKBUILD",
453 ] {
454 map.insert(filename.to_string(), shell_scope.clone());
455 }
456
457 let git_rebase_scope = "source.git-rebase-todo".to_string();
459 map.insert("git-rebase-todo".to_string(), git_rebase_scope);
460
461 let git_commit_scope = "source.git-commit".to_string();
463 for filename in ["COMMIT_EDITMSG", "MERGE_MSG", "SQUASH_MSG", "TAG_EDITMSG"] {
464 map.insert(filename.to_string(), git_commit_scope.clone());
465 }
466
467 let gitignore_scope = "source.gitignore".to_string();
469 for filename in [".gitignore", ".dockerignore", ".npmignore", ".hgignore"] {
470 map.insert(filename.to_string(), gitignore_scope.clone());
471 }
472
473 let gitconfig_scope = "source.gitconfig".to_string();
475 for filename in [".gitconfig", ".gitmodules"] {
476 map.insert(filename.to_string(), gitconfig_scope.clone());
477 }
478
479 let gitattributes_scope = "source.gitattributes".to_string();
481 map.insert(".gitattributes".to_string(), gitattributes_scope);
482
483 let groovy_scope = "source.groovy".to_string();
485 map.insert("Jenkinsfile".to_string(), groovy_scope);
486
487 let ruby_scope = "source.ruby".to_string();
490 map.insert("Brewfile".to_string(), ruby_scope);
491
492 let dockerfile_scope = "source.dockerfile".to_string();
494 map.insert("Dockerfile".to_string(), dockerfile_scope.clone());
495 map.insert("Containerfile".to_string(), dockerfile_scope.clone());
496 map.insert("Dockerfile.dev".to_string(), dockerfile_scope.clone());
498 map.insert("Dockerfile.prod".to_string(), dockerfile_scope.clone());
499 map.insert("Dockerfile.test".to_string(), dockerfile_scope.clone());
500 map.insert("Dockerfile.build".to_string(), dockerfile_scope.clone());
501
502 let cmake_scope = "source.cmake".to_string();
504 map.insert("CMakeLists.txt".to_string(), cmake_scope);
505
506 let starlark_scope = "source.starlark".to_string();
508 map.insert("BUILD".to_string(), starlark_scope.clone());
509 map.insert("BUILD.bazel".to_string(), starlark_scope.clone());
510 map.insert("WORKSPACE".to_string(), starlark_scope.clone());
511 map.insert("WORKSPACE.bazel".to_string(), starlark_scope.clone());
512 map.insert("Tiltfile".to_string(), starlark_scope);
513
514 let justfile_scope = "source.justfile".to_string();
516 map.insert("justfile".to_string(), justfile_scope.clone());
517 map.insert("Justfile".to_string(), justfile_scope.clone());
518 map.insert(".justfile".to_string(), justfile_scope);
519
520 let ini_scope = "source.ini".to_string();
522 map.insert(".editorconfig".to_string(), ini_scope);
523
524 let earthfile_scope = "source.earthfile".to_string();
526 map.insert("Earthfile".to_string(), earthfile_scope);
527
528 let hyprlang_scope = "source.hyprlang".to_string();
530 map.insert("hyprland.conf".to_string(), hyprlang_scope.clone());
531 map.insert("hyprpaper.conf".to_string(), hyprlang_scope.clone());
532 map.insert("hyprlock.conf".to_string(), hyprlang_scope);
533
534 let gomod_scope = "source.gomod".to_string();
536 map.insert("go.mod".to_string(), gomod_scope.clone());
537 map.insert("go.sum".to_string(), gomod_scope);
538
539 let yaml_scope = "source.yaml".to_string();
546 for filename in [
547 "yarn.lock",
548 ".clang-format",
549 "_clang-format",
550 ".clang-tidy",
551 ".yamllint",
552 "Podfile.lock",
553 "pubspec.lock",
554 ] {
555 map.insert(filename.to_string(), yaml_scope.clone());
556 }
557
558 let toml_scope = "source.toml".to_string();
562 for filename in ["Cargo.lock", "poetry.lock", "uv.lock"] {
563 map.insert(filename.to_string(), toml_scope.clone());
564 }
565
566 let json_scope = "source.json".to_string();
569 for filename in ["composer.lock", "Pipfile.lock", "flake.lock", "deno.lock"] {
570 map.insert(filename.to_string(), json_scope.clone());
571 }
572
573 map
574 }
575
576 pub(crate) fn add_embedded_grammars(builder: &mut SyntaxSetBuilder) {
578 match SyntaxDefinition::load_from_str(TOML_GRAMMAR, true, Some("TOML")) {
580 Ok(syntax) => {
581 builder.add(syntax);
582 tracing::debug!("Loaded embedded TOML grammar");
583 }
584 Err(e) => {
585 tracing::warn!("Failed to load embedded TOML grammar: {}", e);
586 }
587 }
588
589 match SyntaxDefinition::load_from_str(ODIN_GRAMMAR, true, Some("Odin")) {
591 Ok(syntax) => {
592 builder.add(syntax);
593 tracing::debug!("Loaded embedded Odin grammar");
594 }
595 Err(e) => {
596 tracing::warn!("Failed to load embedded Odin grammar: {}", e);
597 }
598 }
599
600 match SyntaxDefinition::load_from_str(ZIG_GRAMMAR, true, Some("Zig")) {
602 Ok(syntax) => {
603 builder.add(syntax);
604 tracing::debug!("Loaded embedded Zig grammar");
605 }
606 Err(e) => {
607 tracing::warn!("Failed to load embedded Zig grammar: {}", e);
608 }
609 }
610
611 match SyntaxDefinition::load_from_str(GDSCRIPT_GRAMMAR, true, Some("GDScript")) {
613 Ok(syntax) => {
614 builder.add(syntax);
615 tracing::debug!("Loaded embedded GDScript grammar");
616 }
617 Err(e) => {
618 tracing::warn!("Failed to load embedded GDScript grammar: {}", e);
619 }
620 }
621
622 match SyntaxDefinition::load_from_str(GIT_REBASE_GRAMMAR, true, Some("Git Rebase Todo")) {
624 Ok(syntax) => {
625 builder.add(syntax);
626 tracing::debug!("Loaded embedded Git Rebase Todo grammar");
627 }
628 Err(e) => {
629 tracing::warn!("Failed to load embedded Git Rebase Todo grammar: {}", e);
630 }
631 }
632
633 match SyntaxDefinition::load_from_str(GIT_COMMIT_GRAMMAR, true, Some("Git Commit Message"))
635 {
636 Ok(syntax) => {
637 builder.add(syntax);
638 tracing::debug!("Loaded embedded Git Commit Message grammar");
639 }
640 Err(e) => {
641 tracing::warn!("Failed to load embedded Git Commit Message grammar: {}", e);
642 }
643 }
644
645 match SyntaxDefinition::load_from_str(GITIGNORE_GRAMMAR, true, Some("Gitignore")) {
647 Ok(syntax) => {
648 builder.add(syntax);
649 tracing::debug!("Loaded embedded Gitignore grammar");
650 }
651 Err(e) => {
652 tracing::warn!("Failed to load embedded Gitignore grammar: {}", e);
653 }
654 }
655
656 match SyntaxDefinition::load_from_str(GITCONFIG_GRAMMAR, true, Some("Git Config")) {
658 Ok(syntax) => {
659 builder.add(syntax);
660 tracing::debug!("Loaded embedded Git Config grammar");
661 }
662 Err(e) => {
663 tracing::warn!("Failed to load embedded Git Config grammar: {}", e);
664 }
665 }
666
667 match SyntaxDefinition::load_from_str(GITATTRIBUTES_GRAMMAR, true, Some("Git Attributes")) {
669 Ok(syntax) => {
670 builder.add(syntax);
671 tracing::debug!("Loaded embedded Git Attributes grammar");
672 }
673 Err(e) => {
674 tracing::warn!("Failed to load embedded Git Attributes grammar: {}", e);
675 }
676 }
677
678 match SyntaxDefinition::load_from_str(TYPST_GRAMMAR, true, Some("Typst")) {
680 Ok(syntax) => {
681 builder.add(syntax);
682 tracing::debug!("Loaded embedded Typst grammar");
683 }
684 Err(e) => {
685 tracing::warn!("Failed to load embedded Typst grammar: {}", e);
686 }
687 }
688
689 let additional_grammars: &[(&str, &str)] = &[
691 (DOCKERFILE_GRAMMAR, "Dockerfile"),
692 (INI_GRAMMAR, "INI"),
693 (CMAKE_GRAMMAR, "CMake"),
694 (SCSS_GRAMMAR, "SCSS"),
695 (LESS_GRAMMAR, "LESS"),
696 (POWERSHELL_GRAMMAR, "PowerShell"),
697 (KOTLIN_GRAMMAR, "Kotlin"),
698 (SWIFT_GRAMMAR, "Swift"),
699 (DART_GRAMMAR, "Dart"),
700 (ELIXIR_GRAMMAR, "Elixir"),
701 (FSHARP_GRAMMAR, "FSharp"),
702 (NIX_GRAMMAR, "Nix"),
703 (HCL_GRAMMAR, "HCL"),
704 (PROTOBUF_GRAMMAR, "Protocol Buffers"),
705 (GRAPHQL_GRAMMAR, "GraphQL"),
706 (JULIA_GRAMMAR, "Julia"),
707 (NIM_GRAMMAR, "Nim"),
708 (GLEAM_GRAMMAR, "Gleam"),
709 (VLANG_GRAMMAR, "V"),
710 (SOLIDITY_GRAMMAR, "Solidity"),
711 (KDL_GRAMMAR, "KDL"),
712 (NUSHELL_GRAMMAR, "Nushell"),
713 (SMALI_GRAMMAR, "Smali"),
714 (FISH_GRAMMAR, "Fish"),
715 (STARLARK_GRAMMAR, "Starlark"),
716 (JUSTFILE_GRAMMAR, "Justfile"),
717 (EARTHFILE_GRAMMAR, "Earthfile"),
718 (GOMOD_GRAMMAR, "Go Module"),
719 (VUE_GRAMMAR, "Vue"),
720 (SVELTE_GRAMMAR, "Svelte"),
721 (ASTRO_GRAMMAR, "Astro"),
722 (HYPRLANG_GRAMMAR, "Hyprlang"),
723 (AUTOHOTKEY_GRAMMAR, "AutoHotkey"),
724 (RACKET_GRAMMAR, "Racket"),
725 (VERILOG_GRAMMAR, "Verilog"),
726 (SYSTEMVERILOG_GRAMMAR, "SystemVerilog"),
727 (VHDL_GRAMMAR, "VHDL"),
728 (C3_GRAMMAR, "C3"),
729 (ASM_GRAMMAR, "Assembly"),
730 ];
731
732 for (grammar_str, name) in additional_grammars {
733 match SyntaxDefinition::load_from_str(grammar_str, true, Some(name)) {
734 Ok(syntax) => {
735 builder.add(syntax);
736 tracing::debug!("Loaded embedded {} grammar", name);
737 }
738 Err(e) => {
739 tracing::warn!("Failed to load embedded {} grammar: {}", name, e);
740 }
741 }
742 }
743 }
744
745 pub fn find_syntax_for_file(&self, path: &Path) -> Option<&SyntaxReference> {
751 let entry = self.find_by_path(path, None)?;
752 entry
753 .engines
754 .syntect
755 .map(|i| &self.syntax_set.syntaxes()[i])
756 }
757
758 pub fn find_syntax_by_name(&self, name: &str) -> Option<&SyntaxReference> {
766 if let Some(entry) = self.find_by_name(name) {
767 if let Some(idx) = entry.engines.syntect {
768 return Some(&self.syntax_set.syntaxes()[idx]);
769 }
770 }
771 self.syntax_set.find_syntax_by_name(name)
775 }
776
777 fn built_in_aliases() -> Vec<(&'static str, &'static str)> {
786 vec![
787 ("bash", "Bourne Again Shell (bash)"),
789 ("shell", "Bourne Again Shell (bash)"),
790 ("sh", "Bourne Again Shell (bash)"),
791 ("c++", "C++"),
792 ("cpp", "C++"),
793 ("csharp", "C#"),
794 ("objc", "Objective-C"),
795 ("objcpp", "Objective-C++"),
796 ("regex", "Regular Expressions (Python)"),
797 ("regexp", "Regular Expressions (Python)"),
798 ("proto", "Protocol Buffers"),
800 ("protobuf", "Protocol Buffers"),
801 ("gomod", "Go Module"),
802 ("git-rebase", "Git Rebase Todo"),
803 ("git-commit", "Git Commit Message"),
804 ("git-config", "Git Config"),
805 ("git-attributes", "Git Attributes"),
806 ("gitignore", "Gitignore"),
807 ("fsharp", "FSharp"),
808 ("f#", "FSharp"),
809 ("terraform", "HCL"),
810 ("tf", "HCL"),
811 ("ts", "TypeScript"),
812 ("js", "JavaScript"),
813 ("py", "Python"),
814 ("rb", "Ruby"),
815 ("rs", "Rust"),
816 ("md", "Markdown"),
817 ("yml", "YAML"),
818 ("dockerfile", "Dockerfile"),
819 ]
820 }
821
822 pub(crate) fn populate_built_in_aliases(&mut self) {
829 for (short, full) in Self::built_in_aliases() {
830 self.register_alias_inner(short, full, true);
831 }
832 self.rebuild_catalog();
833 }
834
835 pub(crate) fn register_alias(&mut self, short_name: &str, full_name: &str) -> bool {
845 if !self.register_alias_inner(short_name, full_name, false) {
846 return false;
847 }
848 let short_lower = short_name.to_lowercase();
849 let full_lower = full_name.to_lowercase();
850 if let Some(&idx) = self.catalog_by_name.get(&full_lower) {
851 self.catalog_by_name
852 .entry(short_lower.clone())
853 .or_insert(idx);
854 let entry = &mut self.catalog[idx];
855 let replace = match &entry.short_name {
856 None => true,
857 Some(existing) => short_name.len() < existing.len(),
858 };
859 if replace {
860 entry.short_name = Some(short_lower);
861 }
862 }
863 true
864 }
865
866 fn register_alias_inner(
867 &mut self,
868 short_name: &str,
869 full_name: &str,
870 is_built_in: bool,
871 ) -> bool {
872 let short_lower = short_name.to_lowercase();
873
874 let target_exists = self
876 .syntax_set
877 .syntaxes()
878 .iter()
879 .any(|s| s.name.eq_ignore_ascii_case(full_name));
880 if !target_exists {
881 if tree_sitter_for_syntect_name(full_name).is_some() {
885 return false;
886 }
887 if is_built_in {
888 tracing::warn!(
891 "[grammar-alias] Built-in alias '{}' -> '{}': target grammar not found, skipping",
892 short_name, full_name
893 );
894 } else {
895 tracing::warn!(
896 "[grammar-alias] Alias '{}' -> '{}': target grammar not found, skipping",
897 short_name,
898 full_name
899 );
900 }
901 return false;
902 }
903
904 let collides_with_full_name = self
906 .syntax_set
907 .syntaxes()
908 .iter()
909 .any(|s| s.name.eq_ignore_ascii_case(&short_lower));
910 if collides_with_full_name {
911 tracing::debug!(
915 "[grammar-alias] Alias '{}' matches an existing grammar name, skipping (not needed)",
916 short_name
917 );
918 return false;
919 }
920
921 if let Some(existing_target) = self.aliases.get(&short_lower) {
923 if existing_target.eq_ignore_ascii_case(full_name) {
924 return true;
926 }
927 let msg = format!(
928 "Alias '{}' already maps to '{}', cannot remap to '{}'",
929 short_name, existing_target, full_name
930 );
931 if is_built_in {
932 panic!("[grammar-alias] Built-in alias collision: {}", msg);
933 } else {
934 tracing::warn!("[grammar-alias] {}", msg);
935 return false;
936 }
937 }
938
939 let exact_name = self
941 .syntax_set
942 .syntaxes()
943 .iter()
944 .find(|s| s.name.eq_ignore_ascii_case(full_name))
945 .map(|s| s.name.clone())
946 .unwrap();
947
948 self.aliases.insert(short_lower, exact_name);
949 true
950 }
951
952 pub(crate) fn rebuild_catalog(&mut self) {
967 let mut short_by_full: HashMap<String, String> = HashMap::new();
974 let record = |map: &mut HashMap<String, String>, short: &str, full: &str| {
975 let key = full.to_lowercase();
976 let keep = match map.get(&key) {
977 None => true,
978 Some(existing) => short.len() < existing.len(),
979 };
980 if keep {
981 map.insert(key, short.to_string());
982 }
983 };
984 for (short, full) in Self::built_in_aliases() {
985 record(&mut short_by_full, short, full);
986 }
987 for (short, full) in &self.aliases {
988 record(&mut short_by_full, short, full);
989 }
990
991 let derive_language_id =
992 |display_name: &str| -> (String, Option<fresh_languages::Language>) {
993 let ts = tree_sitter_for_syntect_name(display_name);
994 let id = ts
995 .map(|l| l.id().to_string())
996 .unwrap_or_else(|| display_name.to_lowercase());
997 (id, ts)
998 };
999
1000 let mut catalog: Vec<GrammarEntry> = Vec::new();
1001 let mut scope_to_index: HashMap<String, usize> = HashMap::new();
1002
1003 for (idx, syntax) in self.syntax_set.syntaxes().iter().enumerate() {
1024 if syntax.name == "Plain Text" || syntax.name == "JavaScript" {
1025 continue;
1026 }
1027 let (language_id, tree_sitter) = derive_language_id(&syntax.name);
1028 let short_name = short_by_full.get(&syntax.name.to_lowercase()).cloned();
1029 let source = self
1030 .grammar_sources
1031 .get(&syntax.name)
1032 .map(|info| info.source.clone())
1033 .unwrap_or(GrammarSource::BuiltIn);
1034 let entry_index = catalog.len();
1035 scope_to_index.insert(syntax.scope.to_string(), entry_index);
1036
1037 let mut extensions = syntax.file_extensions.clone();
1043 if let Some(lang) = tree_sitter {
1044 for ext in lang.extensions() {
1045 let ext = ext.to_string();
1046 if !extensions.iter().any(|e| e == &ext) {
1047 extensions.push(ext);
1048 }
1049 }
1050 }
1051
1052 if syntax.name != "Fish" {
1057 extensions.retain(|e| e != "fish");
1058 }
1059
1060 catalog.push(GrammarEntry {
1061 display_name: syntax.name.clone(),
1062 language_id,
1063 short_name,
1064 extensions,
1065 filenames: Vec::new(),
1066 filename_globs: Vec::new(),
1067 source,
1068 engines: GrammarEngines {
1069 syntect: Some(idx),
1070 tree_sitter,
1071 },
1072 });
1073 }
1074
1075 for (filename, scope) in &self.filename_scopes {
1077 if let Some(&idx) = scope_to_index.get(scope) {
1078 if !catalog[idx].filenames.iter().any(|f| f == filename) {
1079 catalog[idx].filenames.push(filename.clone());
1080 }
1081 }
1082 }
1083
1084 for (ext, scope) in &self.user_extensions {
1086 if let Some(&idx) = scope_to_index.get(scope) {
1087 if !catalog[idx].extensions.iter().any(|e| e == ext) {
1088 catalog[idx].extensions.push(ext.clone());
1089 }
1090 }
1091 }
1092
1093 let mut ts_covered: std::collections::HashSet<fresh_languages::Language> =
1098 std::collections::HashSet::new();
1099 for entry in &catalog {
1100 if let Some(lang) = entry.engines.tree_sitter {
1101 ts_covered.insert(lang);
1102 }
1103 }
1104 for lang in fresh_languages::Language::all() {
1105 if ts_covered.contains(lang) {
1106 continue;
1107 }
1108 let display_name = lang.display_name().to_string();
1109 let language_id = lang.id().to_string();
1110 let short_name = short_by_full.get(&display_name.to_lowercase()).cloned();
1111 let extensions: Vec<String> = lang.extensions().iter().map(|s| s.to_string()).collect();
1112 catalog.push(GrammarEntry {
1113 display_name,
1114 language_id,
1115 short_name,
1116 extensions,
1117 filenames: Vec::new(),
1118 filename_globs: Vec::new(),
1119 source: GrammarSource::BuiltIn,
1120 engines: GrammarEngines {
1121 syntect: None,
1122 tree_sitter: Some(*lang),
1123 },
1124 });
1125 }
1126
1127 let mut by_name: HashMap<String, usize> = HashMap::new();
1135 let mut by_extension: HashMap<String, usize> = HashMap::new();
1136 let mut by_filename: HashMap<String, usize> = HashMap::new();
1137 for (idx, entry) in catalog.iter().enumerate() {
1138 by_name.insert(entry.display_name.to_lowercase(), idx);
1139 by_name.insert(entry.language_id.to_lowercase(), idx);
1140 if let Some(short) = &entry.short_name {
1141 by_name.insert(short.to_lowercase(), idx);
1142 }
1143 for ext in &entry.extensions {
1144 by_extension.entry(ext.to_lowercase()).or_insert(idx);
1145 by_filename.entry(ext.clone()).or_insert(idx);
1146 }
1147 for filename in &entry.filenames {
1148 by_filename.entry(filename.clone()).or_insert(idx);
1149 }
1150 }
1151
1152 self.catalog = catalog;
1153 self.catalog_by_name = by_name;
1154 self.catalog_by_extension = by_extension;
1155 self.catalog_by_filename = by_filename;
1156
1157 if !self.applied_language_config.is_empty() {
1161 let cfg = std::mem::take(&mut self.applied_language_config);
1162 self.apply_language_config_inner(&cfg);
1163 self.applied_language_config = cfg;
1164 }
1165 self.catalog_gen = self.catalog_gen.wrapping_add(1);
1166 }
1167
1168 pub fn catalog(&self) -> &[GrammarEntry] {
1170 &self.catalog
1171 }
1172
1173 pub fn catalog_gen(&self) -> u64 {
1177 self.catalog_gen
1178 }
1179
1180 pub fn find_by_name(&self, name: &str) -> Option<&GrammarEntry> {
1186 self.catalog_by_name
1187 .get(&name.to_lowercase())
1188 .map(|&idx| &self.catalog[idx])
1189 }
1190
1191 pub fn find_by_path(&self, path: &Path, first_line: Option<&str>) -> Option<&GrammarEntry> {
1212 let filename = path.file_name().and_then(|n| n.to_str());
1213 let path_str = path.to_str().unwrap_or("");
1214
1215 if let Some(name) = filename {
1216 if let Some(&idx) = self.catalog_by_filename.get(name) {
1217 return Some(&self.catalog[idx]);
1218 }
1219 }
1220
1221 if let Some(name) = filename {
1223 for entry in &self.catalog {
1224 for pattern in &entry.filename_globs {
1225 let matched = if is_path_pattern(pattern) {
1226 path_glob_matches(pattern, path_str)
1227 } else {
1228 filename_glob_matches(pattern, name)
1229 };
1230 if matched {
1231 return Some(entry);
1232 }
1233 }
1234 }
1235 }
1236
1237 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
1238 if let Some(entry) = self.find_by_extension(ext) {
1239 return Some(entry);
1240 }
1241 }
1242
1243 let line = first_line?;
1248 if let Some(syntax) = self.syntax_set.find_syntax_by_first_line(line) {
1249 if let Some(entry) = self.find_by_name(&syntax.name) {
1250 return Some(entry);
1251 }
1252 }
1253
1254 let lang = super::shebang::language_for_shebang(line)?;
1258 self.find_by_name(lang)
1259 }
1260
1261 pub fn find_by_extension(&self, ext: &str) -> Option<&GrammarEntry> {
1263 self.catalog_by_extension
1264 .get(&ext.to_lowercase())
1265 .map(|&idx| &self.catalog[idx])
1266 }
1267
1268 pub fn apply_language_config(
1281 &mut self,
1282 languages: &HashMap<String, crate::config::LanguageConfig>,
1283 ) {
1284 self.applied_language_config = languages.clone();
1285 self.apply_language_config_inner(languages);
1286 self.catalog_gen = self.catalog_gen.wrapping_add(1);
1287 }
1288
1289 fn apply_language_config_inner(
1294 &mut self,
1295 languages: &HashMap<String, crate::config::LanguageConfig>,
1296 ) {
1297 for (lang_id, lang_cfg) in languages {
1298 let grammar_name = if lang_cfg.grammar.is_empty() {
1299 lang_id.as_str()
1300 } else {
1301 lang_cfg.grammar.as_str()
1302 };
1303
1304 let idx = self
1306 .catalog_by_name
1307 .get(&grammar_name.to_lowercase())
1308 .copied()
1309 .or_else(|| self.catalog_by_name.get(&lang_id.to_lowercase()).copied())
1310 .unwrap_or_else(|| {
1311 let idx = self.catalog.len();
1312 self.catalog.push(GrammarEntry {
1313 display_name: lang_id.clone(),
1314 language_id: lang_id.clone(),
1315 short_name: None,
1316 extensions: Vec::new(),
1317 filenames: Vec::new(),
1318 filename_globs: Vec::new(),
1319 source: GrammarSource::BuiltIn,
1320 engines: GrammarEngines::default(),
1321 });
1322 idx
1323 });
1324
1325 self.catalog_by_name
1330 .entry(lang_id.to_lowercase())
1331 .or_insert(idx);
1332
1333 for ext in &lang_cfg.extensions {
1334 if !self.catalog[idx].extensions.iter().any(|e| e == ext) {
1335 self.catalog[idx].extensions.push(ext.clone());
1336 }
1337 self.catalog_by_extension.insert(ext.to_lowercase(), idx);
1339 }
1340 for filename in &lang_cfg.filenames {
1341 if is_glob_pattern(filename) {
1342 if !self.catalog[idx]
1343 .filename_globs
1344 .iter()
1345 .any(|f| f == filename)
1346 {
1347 self.catalog[idx].filename_globs.push(filename.clone());
1348 }
1349 } else {
1350 if !self.catalog[idx].filenames.iter().any(|f| f == filename) {
1351 self.catalog[idx].filenames.push(filename.clone());
1352 }
1353 self.catalog_by_filename.insert(filename.clone(), idx);
1354 }
1355 }
1356 }
1357 }
1358
1359 pub fn syntax_set(&self) -> &Arc<SyntaxSet> {
1361 &self.syntax_set
1362 }
1363
1364 pub fn syntax_set_arc(&self) -> Arc<SyntaxSet> {
1366 Arc::clone(&self.syntax_set)
1367 }
1368
1369 pub fn available_syntaxes(&self) -> Vec<&str> {
1371 self.syntax_set
1372 .syntaxes()
1373 .iter()
1374 .map(|s| s.name.as_str())
1375 .collect()
1376 }
1377
1378 pub fn available_grammar_info(&self) -> Vec<GrammarInfo> {
1385 let mut result: Vec<GrammarInfo> = self
1386 .catalog
1387 .iter()
1388 .map(|entry| GrammarInfo {
1389 name: entry.display_name.clone(),
1390 source: entry.source.clone(),
1391 file_extensions: entry.extensions.clone(),
1392 short_name: entry.short_name.clone(),
1393 })
1394 .collect();
1395 result.sort_by(|a, b| a.name.to_lowercase().cmp(&b.name.to_lowercase()));
1396 result
1397 }
1398
1399 pub(crate) fn grammar_sources(&self) -> &HashMap<String, GrammarInfo> {
1401 &self.grammar_sources
1402 }
1403
1404 pub(crate) fn build_grammar_sources_from_syntax_set(
1408 syntax_set: &SyntaxSet,
1409 ) -> HashMap<String, GrammarInfo> {
1410 let mut sources = HashMap::new();
1411 for syntax in syntax_set.syntaxes() {
1412 sources.insert(
1413 syntax.name.clone(),
1414 GrammarInfo {
1415 name: syntax.name.clone(),
1416 source: GrammarSource::BuiltIn,
1417 file_extensions: syntax.file_extensions.clone(),
1418 short_name: None,
1419 },
1420 );
1421 }
1422 sources
1423 }
1424
1425 #[cfg(test)]
1427 pub(crate) fn user_extensions(&self) -> &HashMap<String, String> {
1428 &self.user_extensions
1429 }
1430
1431 #[cfg(test)]
1433 pub(crate) fn loaded_grammar_paths(&self) -> &[GrammarSpec] {
1434 &self.loaded_grammar_paths
1435 }
1436
1437 pub fn with_additional_grammars(
1451 base: &GrammarRegistry,
1452 additional: &[GrammarSpec],
1453 ) -> Option<Self> {
1454 tracing::info!(
1455 "[SYNTAX DEBUG] with_additional_grammars: adding {} grammars to base with {} syntaxes",
1456 additional.len(),
1457 base.syntax_set.syntaxes().len()
1458 );
1459
1460 let mut builder = (*base.syntax_set).clone().into_builder();
1464
1465 let mut user_extensions = base.user_extensions.clone();
1467
1468 let mut loaded_grammar_paths = base.loaded_grammar_paths.clone();
1470
1471 let mut grammar_sources = base.grammar_sources.clone();
1473
1474 for spec in additional {
1476 tracing::info!(
1477 "[SYNTAX DEBUG] loading new grammar file: lang='{}', path={:?}, extensions={:?}",
1478 spec.language,
1479 spec.path,
1480 spec.extensions
1481 );
1482 match Self::load_grammar_file(&spec.path) {
1483 Ok(syntax) => {
1484 let scope = syntax.scope.to_string();
1485 let syntax_name = syntax.name.clone();
1486 tracing::info!(
1487 "[SYNTAX DEBUG] grammar loaded successfully: name='{}', scope='{}'",
1488 syntax_name,
1489 scope
1490 );
1491 builder.add(syntax);
1492 tracing::info!(
1493 "Loaded grammar for '{}' from {:?} with extensions {:?}",
1494 spec.language,
1495 spec.path,
1496 spec.extensions
1497 );
1498 for ext in &spec.extensions {
1500 user_extensions.insert(ext.clone(), scope.clone());
1501 }
1502 grammar_sources.insert(
1504 syntax_name.clone(),
1505 GrammarInfo {
1506 name: syntax_name,
1507 source: GrammarSource::Plugin {
1508 plugin: spec.language.clone(),
1509 path: spec.path.clone(),
1510 },
1511 file_extensions: spec.extensions.clone(),
1512 short_name: None,
1513 },
1514 );
1515 loaded_grammar_paths.push(spec.clone());
1517 }
1518 Err(e) => {
1519 tracing::warn!(
1520 "Failed to load grammar for '{}' from {:?}: {}",
1521 spec.language,
1522 spec.path,
1523 e
1524 );
1525 }
1526 }
1527 }
1528
1529 let mut reg = Self {
1530 syntax_set: Arc::new(builder.build()),
1531 user_extensions,
1532 filename_scopes: base.filename_scopes.clone(),
1533 loaded_grammar_paths,
1534 grammar_sources,
1535 aliases: base.aliases.clone(),
1536 catalog: Vec::new(),
1537 catalog_by_name: HashMap::new(),
1538 catalog_by_extension: HashMap::new(),
1539 catalog_by_filename: HashMap::new(),
1540 applied_language_config: HashMap::new(),
1541 catalog_gen: 0,
1542 };
1543 reg.rebuild_catalog();
1544 Some(reg)
1545 }
1546
1547 pub(crate) fn load_grammar_file(path: &Path) -> Result<SyntaxDefinition, String> {
1553 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
1554
1555 match ext {
1556 "sublime-syntax" => {
1557 let content = std::fs::read_to_string(path)
1558 .map_err(|e| format!("Failed to read file: {}", e))?;
1559 SyntaxDefinition::load_from_str(
1560 &content,
1561 true,
1562 path.file_stem().and_then(|s| s.to_str()),
1563 )
1564 .map_err(|e| format!("Failed to parse sublime-syntax: {}", e))
1565 }
1566 _ => Err(format!(
1567 "Unsupported grammar format: .{}. Only .sublime-syntax is supported.",
1568 ext
1569 )),
1570 }
1571 }
1572}
1573
1574impl Default for GrammarRegistry {
1575 fn default() -> Self {
1576 let defaults = SyntaxSet::load_defaults_newlines();
1578 let mut builder = defaults.into_builder();
1579 Self::add_embedded_grammars(&mut builder);
1580 let syntax_set = builder.build();
1581 let filename_scopes = Self::build_filename_scopes();
1582 let extra_extensions = Self::build_extra_extensions();
1583
1584 let mut registry = Self::new(syntax_set, extra_extensions, filename_scopes);
1585 registry.populate_built_in_aliases();
1586 registry.rebuild_catalog();
1587 registry
1588 }
1589}
1590
1591#[derive(Debug, Deserialize)]
1594pub struct PackageManifest {
1595 #[serde(default)]
1596 pub contributes: Option<Contributes>,
1597}
1598
1599#[derive(Debug, Deserialize, Default)]
1600pub struct Contributes {
1601 #[serde(default)]
1602 pub languages: Vec<LanguageContribution>,
1603 #[serde(default)]
1604 pub grammars: Vec<GrammarContribution>,
1605}
1606
1607#[derive(Debug, Deserialize)]
1608pub struct LanguageContribution {
1609 pub id: String,
1610 #[serde(default)]
1611 pub extensions: Vec<String>,
1612}
1613
1614#[derive(Debug, Deserialize)]
1615pub struct GrammarContribution {
1616 pub language: String,
1617 #[serde(rename = "scopeName")]
1618 pub scope_name: String,
1619 pub path: String,
1620}
1621
1622#[cfg(test)]
1623mod tests {
1624 use super::*;
1625
1626 #[test]
1627 fn test_empty_registry() {
1628 let registry = GrammarRegistry::empty();
1629 assert!(!registry.available_syntaxes().is_empty());
1631 }
1632
1633 #[test]
1634 fn test_default_registry() {
1635 let registry = GrammarRegistry::default();
1636 assert!(!registry.available_syntaxes().is_empty());
1638 }
1639
1640 #[test]
1641 fn test_find_syntax_for_common_extensions() {
1642 let registry = GrammarRegistry::default();
1643
1644 let test_cases = [
1651 ("test.py", true),
1652 ("test.rs", true),
1653 ("test.js", false),
1654 ("test.json", true),
1655 ("test.md", true),
1656 ("test.html", true),
1657 ("test.css", true),
1658 ("test.gd", true),
1659 ("test.unknown_extension_xyz", false),
1660 ];
1661
1662 for (filename, should_exist) in test_cases {
1663 let path = Path::new(filename);
1664 let result = registry.find_syntax_for_file(path);
1665 assert_eq!(
1666 result.is_some(),
1667 should_exist,
1668 "Expected {:?} for {}",
1669 should_exist,
1670 filename
1671 );
1672 }
1673 }
1674
1675 #[test]
1676 fn test_racket_grammar_loaded() {
1677 let registry = GrammarRegistry::default();
1678 for filename in ["main.rkt", "data.rktd", "info.rktl", "doc.scrbl"] {
1679 let result = registry.find_syntax_for_file(Path::new(filename));
1680 assert!(
1681 result.is_some(),
1682 "Racket grammar should be available for {}",
1683 filename
1684 );
1685 let entry = registry.find_by_path(Path::new(filename), None).unwrap();
1686 assert_eq!(entry.display_name, "Racket", "for {}", filename);
1687 }
1688 }
1689
1690 #[test]
1691 fn test_syntax_set_arc() {
1692 let registry = GrammarRegistry::default();
1693 let arc1 = registry.syntax_set_arc();
1694 let arc2 = registry.syntax_set_arc();
1695 assert!(Arc::ptr_eq(&arc1, &arc2));
1697 }
1698
1699 #[test]
1700 fn test_shell_dotfiles_detection() {
1701 let registry = GrammarRegistry::default();
1702
1703 let shell_files = [".zshrc", ".zprofile", ".zshenv", ".bash_aliases"];
1705
1706 for filename in shell_files {
1707 let path = Path::new(filename);
1708 let result = registry.find_syntax_for_file(path);
1709 assert!(
1710 result.is_some(),
1711 "{} should be detected as a syntax",
1712 filename
1713 );
1714 let syntax = result.unwrap();
1715 assert!(
1717 syntax.name.to_lowercase().contains("bash")
1718 || syntax.name.to_lowercase().contains("shell"),
1719 "{} should be detected as shell/bash, got: {}",
1720 filename,
1721 syntax.name
1722 );
1723 }
1724 }
1725
1726 #[test]
1727 fn test_shebang_interpreter_targets_resolve() {
1728 let registry = GrammarRegistry::default();
1731 for name in [
1732 "/bin/sh",
1733 "/usr/bin/fish",
1734 "/usr/bin/env python3",
1735 "/usr/bin/env ruby",
1736 "/usr/bin/perl",
1737 "/usr/bin/env php",
1738 "/usr/bin/env node",
1739 "/usr/bin/env deno",
1740 "/usr/bin/lua",
1741 "/usr/bin/pwsh",
1742 "/usr/bin/tclsh",
1743 "/usr/bin/env groovy",
1744 "/usr/bin/env elixir",
1745 "/usr/bin/env Rscript",
1746 "/usr/bin/env julia",
1747 "/usr/bin/nu",
1748 "/usr/bin/dart",
1749 ] {
1750 let line = format!("#!{name}\n");
1751 let lang = super::super::shebang::language_for_shebang(&line)
1752 .unwrap_or_else(|| panic!("expected an interpreter mapping for {line:?}"));
1753 assert!(
1754 registry.find_by_name(lang).is_some(),
1755 "interpreter mapping {line:?} → {lang:?} must resolve to a catalog grammar",
1756 );
1757 }
1758 }
1759
1760 #[test]
1761 fn test_find_by_path_detects_shebang_only_interpreters() {
1762 let registry = GrammarRegistry::default();
1766 let cases = [
1767 ("#!/usr/bin/fish\n", "fish"),
1768 ("#!/usr/bin/lua\n", "lua"),
1769 ("#!/usr/bin/pwsh\n", "powershell"),
1770 ("#!/usr/bin/tclsh\n", "tcl"),
1771 ("#!/usr/bin/env groovy\n", "groovy"),
1772 ("#!/usr/bin/env elixir\n", "elixir"),
1773 ("#!/usr/bin/env Rscript\n", "r"),
1774 ];
1775 for (first_line, expected_id) in cases {
1776 let entry = registry
1777 .find_by_path(Path::new("scriptfile"), Some(first_line))
1778 .unwrap_or_else(|| panic!("no grammar for {first_line:?}"));
1779 assert_eq!(
1780 entry.language_id, expected_id,
1781 "shebang {first_line:?} should detect {expected_id:?}, got {:?}",
1782 entry.language_id,
1783 );
1784 }
1785 }
1786
1787 #[test]
1788 fn test_find_by_path_extension_still_wins_over_shebang() {
1789 let registry = GrammarRegistry::default();
1792 let entry = registry
1793 .find_by_path(Path::new("script.py"), Some("#!/bin/sh\n"))
1794 .unwrap();
1795 assert_eq!(entry.language_id, "python");
1796 }
1797
1798 #[test]
1799 fn test_pkgbuild_detection() {
1800 let registry = GrammarRegistry::default();
1801
1802 for filename in ["PKGBUILD", "APKBUILD"] {
1804 let path = Path::new(filename);
1805 let result = registry.find_syntax_for_file(path);
1806 assert!(
1807 result.is_some(),
1808 "{} should be detected as a syntax",
1809 filename
1810 );
1811 let syntax = result.unwrap();
1812 assert!(
1814 syntax.name.to_lowercase().contains("bash")
1815 || syntax.name.to_lowercase().contains("shell"),
1816 "{} should be detected as shell/bash, got: {}",
1817 filename,
1818 syntax.name
1819 );
1820 }
1821 }
1822
1823 #[test]
1824 fn test_find_syntax_with_glob_filenames() {
1825 let mut registry = GrammarRegistry::default();
1826 let mut languages = std::collections::HashMap::new();
1827 languages.insert(
1828 "shell-configs".to_string(),
1829 crate::config::LanguageConfig {
1830 extensions: vec!["sh".to_string()],
1831 filenames: vec!["*.conf".to_string(), "*rc".to_string()],
1832 grammar: "bash".to_string(),
1833 comment_prefix: Some("#".to_string()),
1834 auto_indent: true,
1835 auto_close: None,
1836 auto_surround: None,
1837 textmate_grammar: None,
1838 show_whitespace_tabs: true,
1839 line_wrap: None,
1840 wrap_column: None,
1841 page_view: None,
1842 page_width: None,
1843 use_tabs: None,
1844 tab_size: None,
1845 formatter: None,
1846 format_on_save: false,
1847 on_save: vec![],
1848 word_characters: None,
1849 indent: None,
1850 },
1851 );
1852 registry.apply_language_config(&languages);
1853
1854 assert!(
1855 registry
1856 .find_by_path(Path::new("nftables.conf"), None)
1857 .is_some(),
1858 "*.conf should match nftables.conf"
1859 );
1860 assert!(
1861 registry.find_by_path(Path::new("lfrc"), None).is_some(),
1862 "*rc should match lfrc"
1863 );
1864 let _ = registry.find_by_path(Path::new("randomfile"), None);
1866 }
1867
1868 #[test]
1869 fn test_find_syntax_with_path_glob_filenames() {
1870 let mut registry = GrammarRegistry::default();
1871 let mut languages = std::collections::HashMap::new();
1872 languages.insert(
1873 "shell-configs".to_string(),
1874 crate::config::LanguageConfig {
1875 extensions: vec!["sh".to_string()],
1876 filenames: vec!["/etc/**/rc.*".to_string()],
1877 grammar: "bash".to_string(),
1878 comment_prefix: Some("#".to_string()),
1879 auto_indent: true,
1880 auto_close: None,
1881 auto_surround: None,
1882 textmate_grammar: None,
1883 show_whitespace_tabs: true,
1884 line_wrap: None,
1885 wrap_column: None,
1886 page_view: None,
1887 page_width: None,
1888 use_tabs: None,
1889 tab_size: None,
1890 formatter: None,
1891 format_on_save: false,
1892 on_save: vec![],
1893 word_characters: None,
1894 indent: None,
1895 },
1896 );
1897 registry.apply_language_config(&languages);
1898
1899 assert!(
1900 registry
1901 .find_by_path(Path::new("/etc/rc.conf"), None)
1902 .is_some(),
1903 "/etc/**/rc.* should match /etc/rc.conf"
1904 );
1905 assert!(
1906 registry
1907 .find_by_path(Path::new("/etc/init/rc.local"), None)
1908 .is_some(),
1909 "/etc/**/rc.* should match /etc/init/rc.local"
1910 );
1911 let _ = registry.find_by_path(Path::new("/var/rc.conf"), None);
1912 }
1913
1914 #[test]
1915 fn test_exact_filename_takes_priority_over_glob() {
1916 let mut registry = GrammarRegistry::default();
1917 let mut languages = std::collections::HashMap::new();
1918
1919 languages.insert(
1921 "custom-lfrc".to_string(),
1922 crate::config::LanguageConfig {
1923 extensions: vec![],
1924 filenames: vec!["lfrc".to_string()],
1925 grammar: "python".to_string(),
1926 comment_prefix: Some("#".to_string()),
1927 auto_indent: true,
1928 auto_close: None,
1929 auto_surround: None,
1930 textmate_grammar: None,
1931 show_whitespace_tabs: true,
1932 line_wrap: None,
1933 wrap_column: None,
1934 page_view: None,
1935 page_width: None,
1936 use_tabs: None,
1937 tab_size: None,
1938 formatter: None,
1939 format_on_save: false,
1940 on_save: vec![],
1941 word_characters: None,
1942 indent: None,
1943 },
1944 );
1945
1946 languages.insert(
1948 "rc-files".to_string(),
1949 crate::config::LanguageConfig {
1950 extensions: vec![],
1951 filenames: vec!["*rc".to_string()],
1952 grammar: "bash".to_string(),
1953 comment_prefix: Some("#".to_string()),
1954 auto_indent: true,
1955 auto_close: None,
1956 auto_surround: None,
1957 textmate_grammar: None,
1958 show_whitespace_tabs: true,
1959 line_wrap: None,
1960 wrap_column: None,
1961 page_view: None,
1962 page_width: None,
1963 use_tabs: None,
1964 tab_size: None,
1965 formatter: None,
1966 format_on_save: false,
1967 on_save: vec![],
1968 word_characters: None,
1969 indent: None,
1970 },
1971 );
1972
1973 registry.apply_language_config(&languages);
1974
1975 let entry = registry.find_by_path(Path::new("lfrc"), None).unwrap();
1977 assert!(
1978 entry.display_name.to_lowercase().contains("python"),
1979 "exact match should win over glob, got: {}",
1980 entry.display_name
1981 );
1982 }
1983
1984 #[test]
1985 fn test_built_in_aliases_resolve() {
1986 let registry = GrammarRegistry::default();
1987
1988 let syntax = registry.find_syntax_by_name("bash");
1990 assert!(syntax.is_some(), "alias 'bash' should resolve");
1991 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
1992
1993 let syntax = registry.find_syntax_by_name("cpp");
1995 assert!(syntax.is_some(), "alias 'cpp' should resolve");
1996 assert_eq!(syntax.unwrap().name, "C++");
1997
1998 let syntax = registry.find_syntax_by_name("csharp");
2000 assert!(syntax.is_some(), "alias 'csharp' should resolve");
2001 assert_eq!(syntax.unwrap().name, "C#");
2002
2003 let syntax = registry.find_syntax_by_name("sh");
2005 assert!(syntax.is_some(), "alias 'sh' should resolve");
2006 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
2007
2008 let syntax = registry.find_syntax_by_name("proto");
2010 assert!(syntax.is_some(), "alias 'proto' should resolve");
2011 assert_eq!(syntax.unwrap().name, "Protocol Buffers");
2012 }
2013
2014 #[test]
2015 fn test_alias_case_insensitive_input() {
2016 let registry = GrammarRegistry::default();
2017
2018 let syntax = registry.find_syntax_by_name("BASH");
2020 assert!(
2021 syntax.is_some(),
2022 "alias 'BASH' should resolve case-insensitively"
2023 );
2024 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
2025
2026 let syntax = registry.find_syntax_by_name("Cpp");
2027 assert!(
2028 syntax.is_some(),
2029 "alias 'Cpp' should resolve case-insensitively"
2030 );
2031 assert_eq!(syntax.unwrap().name, "C++");
2032 }
2033
2034 #[test]
2035 fn test_full_name_still_works() {
2036 let registry = GrammarRegistry::default();
2037
2038 let syntax = registry.find_syntax_by_name("Bourne Again Shell (bash)");
2040 assert!(syntax.is_some(), "full name should still resolve");
2041 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
2042
2043 let syntax = registry.find_syntax_by_name("bourne again shell (bash)");
2045 assert!(
2046 syntax.is_some(),
2047 "case-insensitive full name should resolve"
2048 );
2049 assert_eq!(syntax.unwrap().name, "Bourne Again Shell (bash)");
2050 }
2051
2052 #[test]
2053 fn test_alias_does_not_shadow_full_names() {
2054 let registry = GrammarRegistry::default();
2055
2056 let syntax = registry.find_syntax_by_name("rust");
2058 assert!(syntax.is_some());
2059 assert_eq!(syntax.unwrap().name, "Rust");
2060
2061 let syntax = registry.find_syntax_by_name("go");
2063 assert!(syntax.is_some());
2064 assert_eq!(syntax.unwrap().name, "Go");
2065 }
2066
2067 #[test]
2068 fn test_register_alias_rejects_collision() {
2069 let mut registry = GrammarRegistry::default();
2070
2071 assert!(registry.register_alias("myalias", "Rust"));
2073 assert!(!registry.register_alias("myalias", "Go"));
2074
2075 assert!(registry.register_alias("myalias", "Rust"));
2077 }
2078
2079 #[test]
2080 fn test_register_alias_rejects_nonexistent_target() {
2081 let mut registry = GrammarRegistry::default();
2082 assert!(!registry.register_alias("nope", "Nonexistent Grammar"));
2083 }
2084
2085 #[test]
2086 fn test_register_alias_skips_existing_grammar_name() {
2087 let mut registry = GrammarRegistry::default();
2088
2089 assert!(!registry.register_alias("rust", "Rust"));
2091 assert!(registry.find_syntax_by_name("rust").is_some());
2093 }
2094
2095 #[test]
2096 fn test_available_grammar_info_includes_short_names() {
2097 let registry = GrammarRegistry::default();
2098 let infos = registry.available_grammar_info();
2099
2100 let bash_info = infos.iter().find(|g| g.name == "Bourne Again Shell (bash)");
2101 assert!(bash_info.is_some(), "bash grammar should be in the list");
2102 let bash_info = bash_info.unwrap();
2103 assert!(
2104 bash_info.short_name.is_some(),
2105 "bash grammar should have a short_name"
2106 );
2107 assert_eq!(bash_info.short_name.as_deref(), Some("sh"));
2109 }
2110
2111 #[test]
2112 fn test_catalog_contains_each_language_once() {
2113 let registry = GrammarRegistry::default();
2114 let catalog = registry.catalog();
2115
2116 let mut seen = std::collections::HashSet::new();
2118 for entry in catalog {
2119 let key = entry.display_name.to_lowercase();
2120 assert!(
2121 seen.insert(key.clone()),
2122 "duplicate catalog entry for display_name={:?}",
2123 entry.display_name
2124 );
2125 }
2126
2127 let ts = registry
2130 .find_by_name("TypeScript")
2131 .expect("TypeScript must be in the catalog");
2132 assert!(ts.engines.syntect.is_none());
2133 assert_eq!(
2134 ts.engines.tree_sitter,
2135 Some(fresh_languages::Language::TypeScript)
2136 );
2137 assert_eq!(ts.language_id, "typescript");
2138 assert!(ts.extensions.iter().any(|e| e == "ts"));
2139
2140 for name in ["Rust", "Python"] {
2143 let entry = registry
2144 .find_by_name(name)
2145 .unwrap_or_else(|| panic!("{} must be in the catalog", name));
2146 assert!(
2147 entry.engines.syntect.is_some(),
2148 "{} should have a syntect index",
2149 name
2150 );
2151 assert!(
2152 entry.engines.tree_sitter.is_some(),
2153 "{} should also have a tree-sitter language",
2154 name
2155 );
2156 let by_id = registry
2159 .find_by_name(&entry.language_id)
2160 .expect("language_id should resolve");
2161 assert_eq!(by_id.display_name, entry.display_name);
2162 }
2163
2164 let js = registry
2170 .find_by_name("JavaScript")
2171 .expect("JavaScript must be in the catalog");
2172 assert!(
2173 js.engines.syntect.is_none(),
2174 "JavaScript must not be routed to the syntect engine (issue #899)"
2175 );
2176 assert_eq!(
2177 js.engines.tree_sitter,
2178 Some(fresh_languages::Language::JavaScript),
2179 "JavaScript must carry the tree-sitter language"
2180 );
2181
2182 let gdscript = registry
2183 .find_by_path(Path::new("player.gd"), None)
2184 .expect("player.gd should resolve to GDScript");
2185 assert_eq!(gdscript.display_name, "GDScript");
2186 assert_eq!(gdscript.language_id, "gdscript");
2187 assert!(
2188 gdscript.engines.syntect.is_some(),
2189 "GDScript should use the embedded Syntect grammar"
2190 );
2191 assert!(
2192 gdscript.engines.tree_sitter.is_none(),
2193 "GDScript must not carry a tree-sitter parser"
2194 );
2195 }
2196
2197 #[test]
2198 fn test_catalog_find_by_path_and_extension() {
2199 let registry = GrammarRegistry::default();
2200 let ts = registry
2201 .find_by_path(Path::new("foo.ts"), None)
2202 .expect("foo.ts should resolve");
2203 assert_eq!(ts.display_name, "TypeScript");
2204 let rs = registry.find_by_extension("rs").expect("rs should resolve");
2205 assert_eq!(rs.display_name, "Rust");
2206 }
2207
2208 #[test]
2209 fn test_smali_embedded_grammar_loads_and_resolves() {
2210 let syntax = SyntaxDefinition::load_from_str(SMALI_GRAMMAR, true, Some("Smali"))
2211 .expect("Smali grammar should parse");
2212 assert!(syntax.file_extensions.iter().any(|ext| ext == "smali"));
2213
2214 let registry = GrammarRegistry::default();
2215 let entry = registry
2216 .find_by_path(Path::new("MainActivity.smali"), None)
2217 .expect("Smali files should resolve");
2218 assert_eq!(entry.display_name, "Smali");
2219 assert!(entry.engines.syntect.is_some());
2220 assert!(entry.engines.tree_sitter.is_none());
2221 }
2222
2223 fn lang_cfg(
2225 grammar: &str,
2226 extensions: &[&str],
2227 filenames: &[&str],
2228 ) -> crate::config::LanguageConfig {
2229 crate::config::LanguageConfig {
2230 extensions: extensions.iter().map(|s| s.to_string()).collect(),
2231 filenames: filenames.iter().map(|s| s.to_string()).collect(),
2232 grammar: grammar.to_string(),
2233 comment_prefix: None,
2234 auto_indent: true,
2235 auto_close: None,
2236 auto_surround: None,
2237 textmate_grammar: None,
2238 show_whitespace_tabs: true,
2239 line_wrap: None,
2240 wrap_column: None,
2241 page_view: None,
2242 page_width: None,
2243 use_tabs: None,
2244 tab_size: None,
2245 formatter: None,
2246 format_on_save: false,
2247 on_save: vec![],
2248 word_characters: None,
2249 indent: None,
2250 }
2251 }
2252
2253 #[test]
2257 fn test_user_alias_resolves_via_find_by_name() {
2258 let mut registry = GrammarRegistry::default();
2259 let mut languages = std::collections::HashMap::new();
2260 languages.insert("mylang".to_string(), lang_cfg("Rust", &[], &[]));
2261 registry.apply_language_config(&languages);
2262
2263 let entry = registry
2264 .find_by_name("mylang")
2265 .expect("user-declared alias 'mylang' must resolve");
2266 assert_eq!(entry.display_name, "Rust");
2267 }
2268
2269 #[test]
2273 fn test_register_alias_preserves_applied_language_config() {
2274 let mut registry = GrammarRegistry::default();
2275 let mut languages = std::collections::HashMap::new();
2276 languages.insert(
2277 "shell-configs".to_string(),
2278 lang_cfg("bash", &["myconf"], &["*.myconf"]),
2279 );
2280 registry.apply_language_config(&languages);
2281
2282 assert!(registry.find_by_extension("myconf").is_some());
2284 assert!(
2285 registry
2286 .find_by_path(Path::new("foo.myconf"), None)
2287 .is_some(),
2288 "glob should match before register_alias"
2289 );
2290
2291 registry.register_alias("mycustom", "Rust");
2293
2294 assert!(
2295 registry.find_by_extension("myconf").is_some(),
2296 "config extension must survive register_alias"
2297 );
2298 assert!(
2299 registry
2300 .find_by_path(Path::new("foo.myconf"), None)
2301 .is_some(),
2302 "glob must survive register_alias"
2303 );
2304 }
2305
2306 #[test]
2310 fn test_from_syntax_name_preserves_canonical_display_name() {
2311 use crate::primitives::detected_language::DetectedLanguage;
2312 let registry = GrammarRegistry::default();
2313 let languages = std::collections::HashMap::new();
2314
2315 let detected = DetectedLanguage::from_syntax_name("BASH", ®istry, &languages)
2316 .expect("BASH should resolve via alias");
2317 assert_eq!(
2318 detected.display_name, "Bourne Again Shell (bash)",
2319 "display_name must be canonical, not user-typed"
2320 );
2321 }
2322
2323 #[test]
2327 fn test_config_only_language_appears_in_catalog() {
2328 let mut registry = GrammarRegistry::default();
2329 let mut languages = std::collections::HashMap::new();
2330 languages.insert("elvish".to_string(), lang_cfg("elvish", &["elv"], &[]));
2331 registry.apply_language_config(&languages);
2332
2333 let entry = registry
2334 .find_by_name("elvish")
2335 .expect("elvish should be in the catalog after apply_language_config");
2336 assert!(entry.engines.syntect.is_none());
2337 assert!(entry.engines.tree_sitter.is_none());
2338 assert_eq!(entry.language_id, "elvish");
2339 assert!(entry.extensions.iter().any(|e| e == "elv"));
2340 }
2341
2342 #[test]
2343 fn test_fish_extension_resolves_to_fish_grammar_not_bash() {
2344 let registry = GrammarRegistry::default();
2347 let entry = registry
2348 .find_by_extension("fish")
2349 .expect(".fish should resolve to a grammar entry");
2350
2351 assert_eq!(entry.language_id, "fish");
2352 assert_eq!(entry.display_name, "Fish");
2353 assert!(entry.engines.syntect.is_some());
2354 }
2355
2356 #[test]
2361 fn test_config_extension_overrides_builtin() {
2362 let mut registry = GrammarRegistry::default();
2363 assert_eq!(
2365 registry.find_by_extension("js").unwrap().display_name,
2366 "JavaScript"
2367 );
2368
2369 let mut languages = std::collections::HashMap::new();
2370 languages.insert(
2371 "ts-overlay".to_string(),
2372 lang_cfg("TypeScript", &["js"], &[]),
2373 );
2374 registry.apply_language_config(&languages);
2375
2376 assert_eq!(
2377 registry.find_by_extension("js").unwrap().display_name,
2378 "TypeScript",
2379 "user-config extension must win over built-in"
2380 );
2381 }
2382
2383 #[test]
2390 fn test_bare_filename_resolves_via_find_by_path() {
2391 let registry = GrammarRegistry::default();
2392 for (filename, expected_substr) in [
2393 ("Gemfile", "ruby"),
2394 ("Rakefile", "ruby"),
2395 ("Vagrantfile", "ruby"),
2396 ("Makefile", "makefile"),
2397 ("GNUmakefile", "makefile"),
2398 ] {
2399 let entry = registry
2400 .find_by_path(Path::new(filename), None)
2401 .unwrap_or_else(|| panic!("{} must resolve via catalog", filename));
2402 assert!(
2403 entry.display_name.to_lowercase().contains(expected_substr),
2404 "{} should resolve to {} grammar, got {}",
2405 filename,
2406 expected_substr,
2407 entry.display_name
2408 );
2409 }
2410 }
2411
2412 #[test]
2417 fn test_jsx_resolves_to_javascript() {
2418 let registry = GrammarRegistry::default();
2419 let entry = registry
2420 .find_by_path(Path::new("foo.jsx"), None)
2421 .expect("foo.jsx must resolve");
2422 assert_eq!(entry.display_name, "JavaScript");
2423 }
2424
2425 #[test]
2430 fn test_rebuild_catalog_replays_language_config() {
2431 let mut registry = GrammarRegistry::default();
2432 let mut languages = std::collections::HashMap::new();
2433 languages.insert(
2434 "myshell".to_string(),
2435 lang_cfg("bash", &["myext"], &["*.myglob"]),
2436 );
2437 registry.apply_language_config(&languages);
2438 assert!(registry.find_by_extension("myext").is_some());
2439 assert!(registry
2440 .find_by_path(Path::new("foo.myglob"), None)
2441 .is_some());
2442
2443 registry.rebuild_catalog();
2446 assert!(
2447 registry.find_by_extension("myext").is_some(),
2448 "rebuild_catalog must replay applied user config"
2449 );
2450 assert!(
2451 registry
2452 .find_by_path(Path::new("foo.myglob"), None)
2453 .is_some(),
2454 "rebuild_catalog must replay user globs"
2455 );
2456 }
2457
2458 #[test]
2461 fn test_apply_language_config_idempotent() {
2462 let mut registry = GrammarRegistry::default();
2463 let mut languages = std::collections::HashMap::new();
2464 languages.insert(
2465 "shell-cfg".to_string(),
2466 lang_cfg("bash", &["myconf"], &["*.myconf"]),
2467 );
2468
2469 registry.apply_language_config(&languages);
2470 let first_extensions = registry
2471 .find_by_name("bash")
2472 .unwrap()
2473 .extensions
2474 .iter()
2475 .filter(|e| e == &"myconf")
2476 .count();
2477 let first_globs = registry
2478 .find_by_name("bash")
2479 .unwrap()
2480 .filename_globs
2481 .iter()
2482 .filter(|g| g == &"*.myconf")
2483 .count();
2484 assert_eq!(first_extensions, 1);
2485 assert_eq!(first_globs, 1);
2486
2487 registry.apply_language_config(&languages);
2489 let second_extensions = registry
2490 .find_by_name("bash")
2491 .unwrap()
2492 .extensions
2493 .iter()
2494 .filter(|e| e == &"myconf")
2495 .count();
2496 let second_globs = registry
2497 .find_by_name("bash")
2498 .unwrap()
2499 .filename_globs
2500 .iter()
2501 .filter(|g| g == &"*.myconf")
2502 .count();
2503 assert_eq!(second_extensions, 1, "extensions must not duplicate");
2504 assert_eq!(second_globs, 1, "globs must not duplicate");
2505 }
2506
2507 #[test]
2513 fn test_julia_adjoint_does_not_start_string() {
2514 use syntect::parsing::{ParseState, ScopeStack};
2515
2516 let registry = GrammarRegistry::default();
2517 let syntax_set = registry.syntax_set();
2518 let syntax = registry
2519 .find_syntax_by_name("Julia")
2520 .expect("Julia grammar must be loaded");
2521 let mut state = ParseState::new(syntax);
2522 let mut scopes = ScopeStack::new();
2523
2524 let lines = ["x = A'\n", "function foo()\n", "end\n"];
2526 let mut keyword_line_in_string = false;
2527 let mut found_function_keyword = false;
2528
2529 for line in &lines {
2530 let ops = state.parse_line(line, syntax_set).unwrap();
2531 let mut op_iter = ops.iter().peekable();
2533 for (byte_idx, _) in line.char_indices() {
2534 while let Some((offset, op)) = op_iter.peek() {
2535 if *offset <= byte_idx {
2536 scopes.apply(op).unwrap();
2537 op_iter.next();
2538 } else {
2539 break;
2540 }
2541 }
2542 let in_string = scopes
2543 .as_slice()
2544 .iter()
2545 .any(|s| s.build_string().starts_with("string."));
2546 let is_function_kw = line[byte_idx..].starts_with("function");
2547 if is_function_kw && in_string {
2548 keyword_line_in_string = true;
2549 }
2550 if is_function_kw && !in_string {
2551 found_function_keyword = true;
2552 }
2553 }
2554 for (_, op) in op_iter {
2556 scopes.apply(op).unwrap();
2557 }
2558 }
2559
2560 assert!(
2561 !keyword_line_in_string,
2562 "the `function` keyword after an adjoint operator must not be inside a string scope"
2563 );
2564 assert!(
2565 found_function_keyword,
2566 "test harness must have reached the `function` keyword"
2567 );
2568 }
2569
2570 #[test]
2573 fn test_julia_char_literal_is_recognized() {
2574 use syntect::parsing::{ParseState, ScopeStack};
2575
2576 let registry = GrammarRegistry::default();
2577 let syntax_set = registry.syntax_set();
2578 let syntax = registry
2579 .find_syntax_by_name("Julia")
2580 .expect("Julia grammar must be loaded");
2581 let mut state = ParseState::new(syntax);
2582 let mut scopes = ScopeStack::new();
2583
2584 let line = "x = 'a'\n";
2585 let ops = state.parse_line(line, syntax_set).unwrap();
2586 let mut saw_constant_or_string_at_quote = false;
2587 let mut op_iter = ops.iter().peekable();
2588 for (byte_idx, _) in line.char_indices() {
2589 while let Some((offset, op)) = op_iter.peek() {
2590 if *offset <= byte_idx {
2591 scopes.apply(op).unwrap();
2592 op_iter.next();
2593 } else {
2594 break;
2595 }
2596 }
2597 if byte_idx == 5 {
2598 let scoped = scopes.as_slice().iter().any(|s| {
2600 let str = s.build_string();
2601 str.starts_with("constant.") || str.starts_with("string.")
2602 });
2603 if scoped {
2604 saw_constant_or_string_at_quote = true;
2605 }
2606 }
2607 }
2608 assert!(
2609 saw_constant_or_string_at_quote,
2610 "char literal 'a' must receive a constant/string scope"
2611 );
2612 }
2613
2614 #[test]
2618 fn test_tree_sitter_bridge() {
2619 assert_eq!(
2620 tree_sitter_for_syntect_name("Bourne Again Shell (bash)"),
2621 Some(fresh_languages::Language::Bash)
2622 );
2623 assert_eq!(
2624 tree_sitter_for_syntect_name("Rust"),
2625 Some(fresh_languages::Language::Rust)
2626 );
2627 assert_eq!(tree_sitter_for_syntect_name("GDScript"), None);
2628 assert_eq!(tree_sitter_for_syntect_name("Nushell"), None);
2630 assert_eq!(tree_sitter_for_syntect_name("does-not-exist"), None);
2632 }
2633}